Total coverage: 317100 (17%)of 1867434
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 // SPDX-License-Identifier: GPL-2.0+ /* * rti802.c * Comedi driver for Analog Devices RTI-802 board * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1999 Anders Blomdell <anders.blomdell@control.lth.se> */ /* * Driver: rti802 * Description: Analog Devices RTI-802 * Author: Anders Blomdell <anders.blomdell@control.lth.se> * Devices: [Analog Devices] RTI-802 (rti802) * Status: works * * Configuration Options: * [0] - i/o base * [1] - unused * [2,4,6,8,10,12,14,16] - dac#[0-7] 0=two's comp, 1=straight * [3,5,7,9,11,13,15,17] - dac#[0-7] 0=bipolar, 1=unipolar */ #include <linux/module.h> #include <linux/comedi/comedidev.h> /* * Register I/O map */ #define RTI802_SELECT 0x00 #define RTI802_DATALOW 0x01 #define RTI802_DATAHIGH 0x02 struct rti802_private { enum { dac_2comp, dac_straight } dac_coding[8]; const struct comedi_lrange *range_type_list[8]; }; static int rti802_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct rti802_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); int i; outb(chan, dev->iobase + RTI802_SELECT); for (i = 0; i < insn->n; i++) { unsigned int val = data[i]; s->readback[chan] = val; /* munge offset binary to two's complement if needed */ if (devpriv->dac_coding[chan] == dac_2comp) val = comedi_offset_munge(s, val); outb(val & 0xff, dev->iobase + RTI802_DATALOW); outb((val >> 8) & 0xff, dev->iobase + RTI802_DATAHIGH); } return insn->n; } static int rti802_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct rti802_private *devpriv; struct comedi_subdevice *s; int i; int ret; ret = comedi_request_region(dev, it->options[0], 0x04); if (ret) return ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; ret = comedi_alloc_subdevices(dev, 1); if (ret) return ret; /* Analog Output subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE; s->maxdata = 0xfff; s->n_chan = 8; s->insn_write = rti802_ao_insn_write; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; s->range_table_list = devpriv->range_type_list; for (i = 0; i < 8; i++) { devpriv->dac_coding[i] = (it->options[3 + 2 * i]) ? (dac_straight) : (dac_2comp); devpriv->range_type_list[i] = (it->options[2 + 2 * i]) ? &range_unipolar10 : &range_bipolar10; } return 0; } static struct comedi_driver rti802_driver = { .driver_name = "rti802", .module = THIS_MODULE, .attach = rti802_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(rti802_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi driver for Analog Devices RTI-802 board"); MODULE_LICENSE("GPL");
3 3 3 3 3 3 3 3 3 16 2 1 3 3 3 3 22 22 1 19 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 // SPDX-License-Identifier: GPL-2.0-or-later /* * Clock domain and sample rate management functions */ #include <linux/bitops.h> #include <linux/init.h> #include <linux/string.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <linux/usb/audio-v2.h> #include <linux/usb/audio-v3.h> #include <sound/core.h> #include <sound/info.h> #include <sound/pcm.h> #include "usbaudio.h" #include "card.h" #include "helper.h" #include "clock.h" #include "quirks.h" union uac23_clock_source_desc { struct uac_clock_source_descriptor v2; struct uac3_clock_source_descriptor v3; }; union uac23_clock_selector_desc { struct uac_clock_selector_descriptor v2; struct uac3_clock_selector_descriptor v3; }; union uac23_clock_multiplier_desc { struct uac_clock_multiplier_descriptor v2; struct uac_clock_multiplier_descriptor v3; }; /* check whether the descriptor bLength has the minimal length */ #define DESC_LENGTH_CHECK(p, proto) \ ((proto) == UAC_VERSION_3 ? \ ((p)->v3.bLength >= sizeof((p)->v3)) : \ ((p)->v2.bLength >= sizeof((p)->v2))) #define GET_VAL(p, proto, field) \ ((proto) == UAC_VERSION_3 ? (p)->v3.field : (p)->v2.field) static void *find_uac_clock_desc(struct usb_host_interface *iface, int id, bool (*validator)(void *, int, int), u8 type, int proto) { void *cs = NULL; while ((cs = snd_usb_find_csint_desc(iface->extra, iface->extralen, cs, type))) { if (validator(cs, id, proto)) return cs; } return NULL; } static bool validate_clock_source(void *p, int id, int proto) { union uac23_clock_source_desc *cs = p; if (!DESC_LENGTH_CHECK(cs, proto)) return false; return GET_VAL(cs, proto, bClockID) == id; } static bool validate_clock_selector(void *p, int id, int proto) { union uac23_clock_selector_desc *cs = p; if (!DESC_LENGTH_CHECK(cs, proto)) return false; if (GET_VAL(cs, proto, bClockID) != id) return false; /* additional length check for baCSourceID array (in bNrInPins size) * and two more fields (which sizes depend on the protocol) */ if (proto == UAC_VERSION_3) return cs->v3.bLength >= sizeof(cs->v3) + cs->v3.bNrInPins + 4 /* bmControls */ + 2 /* wCSelectorDescrStr */; else return cs->v2.bLength >= sizeof(cs->v2) + cs->v2.bNrInPins + 1 /* bmControls */ + 1 /* iClockSelector */; } static bool validate_clock_multiplier(void *p, int id, int proto) { union uac23_clock_multiplier_desc *cs = p; if (!DESC_LENGTH_CHECK(cs, proto)) return false; return GET_VAL(cs, proto, bClockID) == id; } #define DEFINE_FIND_HELPER(name, obj, validator, type2, type3) \ static obj *name(struct snd_usb_audio *chip, int id, \ const struct audioformat *fmt) \ { \ struct usb_host_interface *ctrl_intf = \ snd_usb_find_ctrl_interface(chip, fmt->iface); \ return find_uac_clock_desc(ctrl_intf, id, validator, \ fmt->protocol == UAC_VERSION_3 ? (type3) : (type2), \ fmt->protocol); \ } DEFINE_FIND_HELPER(snd_usb_find_clock_source, union uac23_clock_source_desc, validate_clock_source, UAC2_CLOCK_SOURCE, UAC3_CLOCK_SOURCE); DEFINE_FIND_HELPER(snd_usb_find_clock_selector, union uac23_clock_selector_desc, validate_clock_selector, UAC2_CLOCK_SELECTOR, UAC3_CLOCK_SELECTOR); DEFINE_FIND_HELPER(snd_usb_find_clock_multiplier, union uac23_clock_multiplier_desc, validate_clock_multiplier, UAC2_CLOCK_MULTIPLIER, UAC3_CLOCK_MULTIPLIER); static int uac_clock_selector_get_val(struct snd_usb_audio *chip, int selector_id, int iface_no) { struct usb_host_interface *ctrl_intf; unsigned char buf; int ret; ctrl_intf = snd_usb_find_ctrl_interface(chip, iface_no); ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC2_CS_CUR, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN, UAC2_CX_CLOCK_SELECTOR << 8, snd_usb_ctrl_intf(ctrl_intf) | (selector_id << 8), &buf, sizeof(buf)); if (ret < 0) return ret; return buf; } static int uac_clock_selector_set_val(struct snd_usb_audio *chip, int selector_id, unsigned char pin, int iface_no) { struct usb_host_interface *ctrl_intf; int ret; ctrl_intf = snd_usb_find_ctrl_interface(chip, iface_no); ret = snd_usb_ctl_msg(chip->dev, usb_sndctrlpipe(chip->dev, 0), UAC2_CS_CUR, USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT, UAC2_CX_CLOCK_SELECTOR << 8, snd_usb_ctrl_intf(ctrl_intf) | (selector_id << 8), &pin, sizeof(pin)); if (ret < 0) return ret; if (ret != sizeof(pin)) { usb_audio_err(chip, "setting selector (id %d) unexpected length %d\n", selector_id, ret); return -EINVAL; } ret = uac_clock_selector_get_val(chip, selector_id, iface_no); if (ret < 0) return ret; if (ret != pin) { usb_audio_err(chip, "setting selector (id %d) to %x failed (current: %d)\n", selector_id, pin, ret); return -EINVAL; } return ret; } static bool uac_clock_source_is_valid_quirk(struct snd_usb_audio *chip, const struct audioformat *fmt, int source_id) { bool ret = false; int count; unsigned char data; struct usb_device *dev = chip->dev; union uac23_clock_source_desc *cs_desc; struct usb_host_interface *ctrl_intf; ctrl_intf = snd_usb_find_ctrl_interface(chip, fmt->iface); cs_desc = snd_usb_find_clock_source(chip, source_id, fmt); if (!cs_desc) return false; if (fmt->protocol == UAC_VERSION_2) { /* * Assume the clock is valid if clock source supports only one * single sample rate, the terminal is connected directly to it * (there is no clock selector) and clock type is internal. * This is to deal with some Denon DJ controllers that always * reports that clock is invalid. */ if (fmt->nr_rates == 1 && (fmt->clock & 0xff) == cs_desc->v2.bClockID && (cs_desc->v2.bmAttributes & 0x3) != UAC_CLOCK_SOURCE_TYPE_EXT) return true; } /* * MOTU MicroBook IIc * Sample rate changes takes more than 2 seconds for this device. Clock * validity request returns false during that period. */ if (chip->usb_id == USB_ID(0x07fd, 0x0004)) { count = 0; while ((!ret) && (count < 50)) { int err; msleep(100); err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, UAC2_CS_CONTROL_CLOCK_VALID << 8, snd_usb_ctrl_intf(ctrl_intf) | (source_id << 8), &data, sizeof(data)); if (err < 0) { dev_warn(&dev->dev, "%s(): cannot get clock validity for id %d\n", __func__, source_id); return false; } ret = !!data; count++; } } return ret; } static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, const struct audioformat *fmt, int source_id) { int err; unsigned char data; struct usb_device *dev = chip->dev; u32 bmControls; union uac23_clock_source_desc *cs_desc; struct usb_host_interface *ctrl_intf; ctrl_intf = snd_usb_find_ctrl_interface(chip, fmt->iface); cs_desc = snd_usb_find_clock_source(chip, source_id, fmt); if (!cs_desc) return false; if (fmt->protocol == UAC_VERSION_3) bmControls = le32_to_cpu(cs_desc->v3.bmControls); else bmControls = cs_desc->v2.bmControls; /* If a clock source can't tell us whether it's valid, we assume it is */ if (!uac_v2v3_control_is_readable(bmControls, UAC2_CS_CONTROL_CLOCK_VALID)) return true; err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, UAC2_CS_CONTROL_CLOCK_VALID << 8, snd_usb_ctrl_intf(ctrl_intf) | (source_id << 8), &data, sizeof(data)); if (err < 0) { dev_warn(&dev->dev, "%s(): cannot get clock validity for id %d\n", __func__, source_id); return false; } if (data) return true; else return uac_clock_source_is_valid_quirk(chip, fmt, source_id); } static int __uac_clock_find_source(struct snd_usb_audio *chip, const struct audioformat *fmt, int entity_id, unsigned long *visited, bool validate) { union uac23_clock_source_desc *source; union uac23_clock_selector_desc *selector; union uac23_clock_multiplier_desc *multiplier; int ret, i, cur, err, pins, clock_id; const u8 *sources; int proto = fmt->protocol; bool readable, writeable; u32 bmControls; entity_id &= 0xff; if (test_and_set_bit(entity_id, visited)) { usb_audio_warn(chip, "%s(): recursive clock topology detected, id %d.\n", __func__, entity_id); return -EINVAL; } /* first, see if the ID we're looking at is a clock source already */ source = snd_usb_find_clock_source(chip, entity_id, fmt); if (source) { entity_id = GET_VAL(source, proto, bClockID); if (validate && !uac_clock_source_is_valid(chip, fmt, entity_id)) { usb_audio_err(chip, "clock source %d is not valid, cannot use\n", entity_id); return -ENXIO; } return entity_id; } selector = snd_usb_find_clock_selector(chip, entity_id, fmt); if (selector) { pins = GET_VAL(selector, proto, bNrInPins); clock_id = GET_VAL(selector, proto, bClockID); sources = GET_VAL(selector, proto, baCSourceID); cur = 0; if (proto == UAC_VERSION_3) bmControls = le32_to_cpu(*(__le32 *)(&selector->v3.baCSourceID[0] + pins)); else bmControls = *(__u8 *)(&selector->v2.baCSourceID[0] + pins); readable = uac_v2v3_control_is_readable(bmControls, UAC2_CX_CLOCK_SELECTOR); writeable = uac_v2v3_control_is_writeable(bmControls, UAC2_CX_CLOCK_SELECTOR); if (pins == 1) { ret = 1; goto find_source; } /* for now just warn about buggy device */ if (!readable) usb_audio_warn(chip, "%s(): clock selector control is not readable, id %d\n", __func__, clock_id); /* the entity ID we are looking at is a selector. * find out what it currently selects */ ret = uac_clock_selector_get_val(chip, clock_id, fmt->iface); if (ret < 0) { if (!chip->autoclock) return ret; goto find_others; } /* Selector values are one-based */ if (ret > pins || ret < 1) { usb_audio_err(chip, "%s(): selector reported illegal value, id %d, ret %d\n", __func__, clock_id, ret); if (!chip->autoclock) return -EINVAL; goto find_others; } find_source: cur = ret; ret = __uac_clock_find_source(chip, fmt, sources[ret - 1], visited, validate); if (ret > 0) { /* Skip setting clock selector again for some devices */ if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR || !writeable) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur, fmt->iface); if (err < 0) { if (pins == 1) { usb_audio_dbg(chip, "%s(): selector returned an error, " "assuming a firmware bug, id %d, ret %d\n", __func__, clock_id, err); return ret; } return err; } } if (!validate || ret > 0 || !chip->autoclock) return ret; find_others: if (!writeable) return -ENXIO; /* The current clock source is invalid, try others. */ for (i = 1; i <= pins; i++) { if (i == cur) continue; ret = __uac_clock_find_source(chip, fmt, sources[i - 1], visited, true); if (ret < 0) continue; err = uac_clock_selector_set_val(chip, entity_id, i, fmt->iface); if (err < 0) continue; usb_audio_info(chip, "found and selected valid clock source %d\n", ret); return ret; } return -ENXIO; } /* FIXME: multipliers only act as pass-thru element for now */ multiplier = snd_usb_find_clock_multiplier(chip, entity_id, fmt); if (multiplier) return __uac_clock_find_source(chip, fmt, GET_VAL(multiplier, proto, bCSourceID), visited, validate); return -EINVAL; } /* * For all kinds of sample rate settings and other device queries, * the clock source (end-leaf) must be used. However, clock selectors, * clock multipliers and sample rate converters may be specified as * clock source input to terminal. This functions walks the clock path * to its end and tries to find the source. * * The 'visited' bitfield is used internally to detect recursive loops. * * Returns the clock source UnitID (>=0) on success, or an error. */ int snd_usb_clock_find_source(struct snd_usb_audio *chip, const struct audioformat *fmt, bool validate) { DECLARE_BITMAP(visited, 256); memset(visited, 0, sizeof(visited)); switch (fmt->protocol) { case UAC_VERSION_2: case UAC_VERSION_3: return __uac_clock_find_source(chip, fmt, fmt->clock, visited, validate); default: return -EINVAL; } } static int set_sample_rate_v1(struct snd_usb_audio *chip, const struct audioformat *fmt, int rate) { struct usb_device *dev = chip->dev; unsigned char data[3]; int err, crate; /* if endpoint doesn't have sampling rate control, bail out */ if (!(fmt->attributes & UAC_EP_CS_ATTR_SAMPLE_RATE)) return 0; data[0] = rate; data[1] = rate >> 8; data[2] = rate >> 16; err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR, USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_OUT, UAC_EP_CS_ATTR_SAMPLE_RATE << 8, fmt->endpoint, data, sizeof(data)); if (err < 0) { dev_err(&dev->dev, "%d:%d: cannot set freq %d to ep %#x\n", fmt->iface, fmt->altsetting, rate, fmt->endpoint); return err; } /* Don't check the sample rate for devices which we know don't * support reading */ if (chip->quirk_flags & QUIRK_FLAG_GET_SAMPLE_RATE) return 0; /* the firmware is likely buggy, don't repeat to fail too many times */ if (chip->sample_rate_read_error > 2) return 0; err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC_GET_CUR, USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_IN, UAC_EP_CS_ATTR_SAMPLE_RATE << 8, fmt->endpoint, data, sizeof(data)); if (err < 0) { dev_err(&dev->dev, "%d:%d: cannot get freq at ep %#x\n", fmt->iface, fmt->altsetting, fmt->endpoint); chip->sample_rate_read_error++; return 0; /* some devices don't support reading */ } crate = data[0] | (data[1] << 8) | (data[2] << 16); if (!crate) { dev_info(&dev->dev, "failed to read current rate; disabling the check\n"); chip->sample_rate_read_error = 3; /* three strikes, see above */ return 0; } if (crate != rate) { dev_warn(&dev->dev, "current rate %d is different from the runtime rate %d\n", crate, rate); // runtime->rate = crate; } return 0; } static int get_sample_rate_v2v3(struct snd_usb_audio *chip, int iface, int altsetting, int clock) { struct usb_device *dev = chip->dev; __le32 data; int err; struct usb_host_interface *ctrl_intf; ctrl_intf = snd_usb_find_ctrl_interface(chip, iface); err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, UAC2_CS_CONTROL_SAM_FREQ << 8, snd_usb_ctrl_intf(ctrl_intf) | (clock << 8), &data, sizeof(data)); if (err < 0) { dev_warn(&dev->dev, "%d:%d: cannot get freq (v2/v3): err %d\n", iface, altsetting, err); return 0; } return le32_to_cpu(data); } /* * Try to set the given sample rate: * * Return 0 if the clock source is read-only, the actual rate on success, * or a negative error code. * * This function gets called from format.c to validate each sample rate, too. * Hence no message is shown upon error */ int snd_usb_set_sample_rate_v2v3(struct snd_usb_audio *chip, const struct audioformat *fmt, int clock, int rate) { bool writeable; u32 bmControls; __le32 data; int err; union uac23_clock_source_desc *cs_desc; struct usb_host_interface *ctrl_intf; ctrl_intf = snd_usb_find_ctrl_interface(chip, fmt->iface); cs_desc = snd_usb_find_clock_source(chip, clock, fmt); if (!cs_desc) return 0; if (fmt->protocol == UAC_VERSION_3) bmControls = le32_to_cpu(cs_desc->v3.bmControls); else bmControls = cs_desc->v2.bmControls; writeable = uac_v2v3_control_is_writeable(bmControls, UAC2_CS_CONTROL_SAM_FREQ); if (!writeable) return 0; data = cpu_to_le32(rate); err = snd_usb_ctl_msg(chip->dev, usb_sndctrlpipe(chip->dev, 0), UAC2_CS_CUR, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, UAC2_CS_CONTROL_SAM_FREQ << 8, snd_usb_ctrl_intf(ctrl_intf) | (clock << 8), &data, sizeof(data)); if (err < 0) return err; return get_sample_rate_v2v3(chip, fmt->iface, fmt->altsetting, clock); } static int set_sample_rate_v2v3(struct snd_usb_audio *chip, const struct audioformat *fmt, int rate) { int cur_rate, prev_rate; int clock; /* First, try to find a valid clock. This may trigger * automatic clock selection if the current clock is not * valid. */ clock = snd_usb_clock_find_source(chip, fmt, true); if (clock < 0) { /* We did not find a valid clock, but that might be * because the current sample rate does not match an * external clock source. Try again without validation * and we will do another validation after setting the * rate. */ clock = snd_usb_clock_find_source(chip, fmt, false); /* Hardcoded sample rates */ if (chip->quirk_flags & QUIRK_FLAG_IGNORE_CLOCK_SOURCE) return 0; if (clock < 0) return clock; } prev_rate = get_sample_rate_v2v3(chip, fmt->iface, fmt->altsetting, clock); if (prev_rate == rate) goto validation; cur_rate = snd_usb_set_sample_rate_v2v3(chip, fmt, clock, rate); if (cur_rate < 0) { usb_audio_err(chip, "%d:%d: cannot set freq %d (v2/v3): err %d\n", fmt->iface, fmt->altsetting, rate, cur_rate); return cur_rate; } if (!cur_rate) cur_rate = prev_rate; if (cur_rate != rate) { usb_audio_dbg(chip, "%d:%d: freq mismatch: req %d, clock runs @%d\n", fmt->iface, fmt->altsetting, rate, cur_rate); /* continue processing */ } /* FIXME - TEAC devices require the immediate interface setup */ if (USB_ID_VENDOR(chip->usb_id) == 0x0644) { bool cur_base_48k = (rate % 48000 == 0); bool prev_base_48k = (prev_rate % 48000 == 0); if (cur_base_48k != prev_base_48k) { usb_set_interface(chip->dev, fmt->iface, fmt->altsetting); if (chip->quirk_flags & QUIRK_FLAG_IFACE_DELAY) msleep(50); } } validation: /* validate clock after rate change */ if (!uac_clock_source_is_valid(chip, fmt, clock)) return -ENXIO; return 0; } int snd_usb_init_sample_rate(struct snd_usb_audio *chip, const struct audioformat *fmt, int rate) { usb_audio_dbg(chip, "%d:%d Set sample rate %d, clock %d\n", fmt->iface, fmt->altsetting, rate, fmt->clock); switch (fmt->protocol) { case UAC_VERSION_1: default: return set_sample_rate_v1(chip, fmt, rate); case UAC_VERSION_3: if (chip->badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) { if (rate != UAC3_BADD_SAMPLING_RATE) return -ENXIO; else return 0; } fallthrough; case UAC_VERSION_2: return set_sample_rate_v2v3(chip, fmt, rate); } }
28 26 27 27 26 17 17 16 22 22 22 16 3 17 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix * Copyright (C) 2006 Andrey Volkov, Varma Electronics * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com> */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/can-ml.h> #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/gpio/consumer.h> #include <linux/of.h> static void can_update_state_error_stats(struct net_device *dev, enum can_state new_state) { struct can_priv *priv = netdev_priv(dev); if (new_state <= priv->state) return; switch (new_state) { case CAN_STATE_ERROR_WARNING: priv->can_stats.error_warning++; break; case CAN_STATE_ERROR_PASSIVE: priv->can_stats.error_passive++; break; case CAN_STATE_BUS_OFF: priv->can_stats.bus_off++; break; default: break; } } static int can_tx_state_to_frame(struct net_device *dev, enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: return CAN_ERR_CRTL_ACTIVE; case CAN_STATE_ERROR_WARNING: return CAN_ERR_CRTL_TX_WARNING; case CAN_STATE_ERROR_PASSIVE: return CAN_ERR_CRTL_TX_PASSIVE; default: return 0; } } static int can_rx_state_to_frame(struct net_device *dev, enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: return CAN_ERR_CRTL_ACTIVE; case CAN_STATE_ERROR_WARNING: return CAN_ERR_CRTL_RX_WARNING; case CAN_STATE_ERROR_PASSIVE: return CAN_ERR_CRTL_RX_PASSIVE; default: return 0; } } const char *can_get_state_str(const enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: return "Error Active"; case CAN_STATE_ERROR_WARNING: return "Error Warning"; case CAN_STATE_ERROR_PASSIVE: return "Error Passive"; case CAN_STATE_BUS_OFF: return "Bus Off"; case CAN_STATE_STOPPED: return "Stopped"; case CAN_STATE_SLEEPING: return "Sleeping"; default: return "<unknown>"; } } EXPORT_SYMBOL_GPL(can_get_state_str); static enum can_state can_state_err_to_state(u16 err) { if (err < CAN_ERROR_WARNING_THRESHOLD) return CAN_STATE_ERROR_ACTIVE; if (err < CAN_ERROR_PASSIVE_THRESHOLD) return CAN_STATE_ERROR_WARNING; if (err < CAN_BUS_OFF_THRESHOLD) return CAN_STATE_ERROR_PASSIVE; return CAN_STATE_BUS_OFF; } void can_state_get_by_berr_counter(const struct net_device *dev, const struct can_berr_counter *bec, enum can_state *tx_state, enum can_state *rx_state) { *tx_state = can_state_err_to_state(bec->txerr); *rx_state = can_state_err_to_state(bec->rxerr); } EXPORT_SYMBOL_GPL(can_state_get_by_berr_counter); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state) { struct can_priv *priv = netdev_priv(dev); enum can_state new_state = max(tx_state, rx_state); if (unlikely(new_state == priv->state)) { netdev_warn(dev, "%s: oops, state did not change", __func__); return; } netdev_dbg(dev, "Controller changed from %s State (%d) into %s State (%d).\n", can_get_state_str(priv->state), priv->state, can_get_state_str(new_state), new_state); can_update_state_error_stats(dev, new_state); priv->state = new_state; if (!cf) return; if (unlikely(new_state == CAN_STATE_BUS_OFF)) { cf->can_id |= CAN_ERR_BUSOFF; return; } cf->can_id |= CAN_ERR_CRTL; cf->data[1] |= tx_state >= rx_state ? can_tx_state_to_frame(dev, tx_state) : 0; cf->data[1] |= tx_state <= rx_state ? can_rx_state_to_frame(dev, rx_state) : 0; } EXPORT_SYMBOL_GPL(can_change_state); /* CAN device restart for bus-off recovery */ static int can_restart(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); struct sk_buff *skb; struct can_frame *cf; int err; if (!priv->do_set_mode) return -EOPNOTSUPP; if (netif_carrier_ok(dev)) netdev_err(dev, "Attempt to restart for bus-off recovery, but carrier is OK?\n"); /* No synchronization needed because the device is bus-off and * no messages can come in or go out. */ can_flush_echo_skb(dev); /* send restart message upstream */ skb = alloc_can_err_skb(dev, &cf); if (skb) { cf->can_id |= CAN_ERR_RESTARTED; netif_rx(skb); } /* Now restart the device */ netif_carrier_on(dev); err = priv->do_set_mode(dev, CAN_MODE_START); if (err) { netdev_err(dev, "Restart failed, error %pe\n", ERR_PTR(err)); netif_carrier_off(dev); return err; } else { netdev_dbg(dev, "Restarted\n"); priv->can_stats.restarts++; } return 0; } static void can_restart_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct can_priv *priv = container_of(dwork, struct can_priv, restart_work); can_restart(priv->dev); } int can_restart_now(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); /* A manual restart is only permitted if automatic restart is * disabled and the device is in the bus-off state */ if (priv->restart_ms) return -EINVAL; if (priv->state != CAN_STATE_BUS_OFF) return -EBUSY; cancel_delayed_work_sync(&priv->restart_work); return can_restart(dev); } /* CAN bus-off * * This functions should be called when the device goes bus-off to * tell the netif layer that no more packets can be sent or received. * If enabled, a timer is started to trigger bus-off recovery. */ void can_bus_off(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); if (priv->restart_ms) netdev_info(dev, "bus-off, scheduling restart in %d ms\n", priv->restart_ms); else netdev_info(dev, "bus-off\n"); netif_carrier_off(dev); if (priv->restart_ms) schedule_delayed_work(&priv->restart_work, msecs_to_jiffies(priv->restart_ms)); } EXPORT_SYMBOL_GPL(can_bus_off); void can_setup(struct net_device *dev) { dev->type = ARPHRD_CAN; dev->mtu = CAN_MTU; dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 10; /* New-style flags. */ dev->flags = IFF_NOARP; dev->features = NETIF_F_HW_CSUM; } /* Allocate and setup space for the CAN network device */ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs) { struct can_ml_priv *can_ml; struct net_device *dev; struct can_priv *priv; int size; /* We put the driver's priv, the CAN mid layer priv and the * echo skb into the netdevice's priv. The memory layout for * the netdev_priv is like this: * * +-------------------------+ * | driver's priv | * +-------------------------+ * | struct can_ml_priv | * +-------------------------+ * | array of struct sk_buff | * +-------------------------+ */ size = ALIGN(sizeof_priv, NETDEV_ALIGN) + sizeof(struct can_ml_priv); if (echo_skb_max) size = ALIGN(size, sizeof(struct sk_buff *)) + echo_skb_max * sizeof(struct sk_buff *); dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup, txqs, rxqs); if (!dev) return NULL; priv = netdev_priv(dev); priv->dev = dev; can_ml = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN); can_set_ml_priv(dev, can_ml); if (echo_skb_max) { priv->echo_skb_max = echo_skb_max; priv->echo_skb = (void *)priv + (size - echo_skb_max * sizeof(struct sk_buff *)); } priv->state = CAN_STATE_STOPPED; INIT_DELAYED_WORK(&priv->restart_work, can_restart_work); return dev; } EXPORT_SYMBOL_GPL(alloc_candev_mqs); /* Free space of the CAN network device */ void free_candev(struct net_device *dev) { free_netdev(dev); } EXPORT_SYMBOL_GPL(free_candev); /* changing MTU and control mode for CAN/CANFD devices */ int can_change_mtu(struct net_device *dev, int new_mtu) { struct can_priv *priv = netdev_priv(dev); u32 ctrlmode_static = can_get_static_ctrlmode(priv); /* Do not allow changing the MTU while running */ if (dev->flags & IFF_UP) return -EBUSY; /* allow change of MTU according to the CANFD ability of the device */ switch (new_mtu) { case CAN_MTU: /* 'CANFD-only' controllers can not switch to CAN_MTU */ if (ctrlmode_static & CAN_CTRLMODE_FD) return -EINVAL; priv->ctrlmode &= ~CAN_CTRLMODE_FD; break; case CANFD_MTU: /* check for potential CANFD ability */ if (!(priv->ctrlmode_supported & CAN_CTRLMODE_FD) && !(ctrlmode_static & CAN_CTRLMODE_FD)) return -EINVAL; priv->ctrlmode |= CAN_CTRLMODE_FD; break; default: return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL_GPL(can_change_mtu); /* generic implementation of netdev_ops::ndo_eth_ioctl for CAN devices * supporting hardware timestamps */ int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd) { struct hwtstamp_config hwts_cfg = { 0 }; switch (cmd) { case SIOCSHWTSTAMP: /* set */ if (copy_from_user(&hwts_cfg, ifr->ifr_data, sizeof(hwts_cfg))) return -EFAULT; if (hwts_cfg.tx_type == HWTSTAMP_TX_ON && hwts_cfg.rx_filter == HWTSTAMP_FILTER_ALL) return 0; return -ERANGE; case SIOCGHWTSTAMP: /* get */ hwts_cfg.tx_type = HWTSTAMP_TX_ON; hwts_cfg.rx_filter = HWTSTAMP_FILTER_ALL; if (copy_to_user(ifr->ifr_data, &hwts_cfg, sizeof(hwts_cfg))) return -EFAULT; return 0; default: return -EOPNOTSUPP; } } EXPORT_SYMBOL(can_eth_ioctl_hwts); /* generic implementation of ethtool_ops::get_ts_info for CAN devices * supporting hardware timestamps */ int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_ON); info->rx_filters = BIT(HWTSTAMP_FILTER_ALL); return 0; } EXPORT_SYMBOL(can_ethtool_op_get_ts_info_hwts); /* Common open function when the device gets opened. * * This function should be called in the open function of the device * driver. */ int open_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); if (!priv->bittiming.bitrate) { netdev_err(dev, "bit-timing not yet defined\n"); return -EINVAL; } /* For CAN FD the data bitrate has to be >= the arbitration bitrate */ if ((priv->ctrlmode & CAN_CTRLMODE_FD) && (!priv->fd.data_bittiming.bitrate || priv->fd.data_bittiming.bitrate < priv->bittiming.bitrate)) { netdev_err(dev, "incorrect/missing data bit-timing\n"); return -EINVAL; } /* Switch carrier on if device was stopped while in bus-off state */ if (!netif_carrier_ok(dev)) netif_carrier_on(dev); return 0; } EXPORT_SYMBOL_GPL(open_candev); #ifdef CONFIG_OF /* Common function that can be used to understand the limitation of * a transceiver when it provides no means to determine these limitations * at runtime. */ void of_can_transceiver(struct net_device *dev) { struct device_node *dn; struct can_priv *priv = netdev_priv(dev); struct device_node *np = dev->dev.parent->of_node; int ret; dn = of_get_child_by_name(np, "can-transceiver"); if (!dn) return; ret = of_property_read_u32(dn, "max-bitrate", &priv->bitrate_max); of_node_put(dn); if ((ret && ret != -EINVAL) || (!ret && !priv->bitrate_max)) netdev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit.\n"); } EXPORT_SYMBOL_GPL(of_can_transceiver); #endif /* Common close function for cleanup before the device gets closed. * * This function should be called in the close function of the device * driver. */ void close_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); cancel_delayed_work_sync(&priv->restart_work); can_flush_echo_skb(dev); } EXPORT_SYMBOL_GPL(close_candev); static int can_set_termination(struct net_device *ndev, u16 term) { struct can_priv *priv = netdev_priv(ndev); int set; if (term == priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED]) set = 1; else set = 0; gpiod_set_value_cansleep(priv->termination_gpio, set); return 0; } static int can_get_termination(struct net_device *ndev) { struct can_priv *priv = netdev_priv(ndev); struct device *dev = ndev->dev.parent; struct gpio_desc *gpio; u32 term; int ret; /* Disabling termination by default is the safe choice: Else if many * bus participants enable it, no communication is possible at all. */ gpio = devm_gpiod_get_optional(dev, "termination", GPIOD_OUT_LOW); if (IS_ERR(gpio)) return dev_err_probe(dev, PTR_ERR(gpio), "Cannot get termination-gpios\n"); if (!gpio) return 0; ret = device_property_read_u32(dev, "termination-ohms", &term); if (ret) { netdev_err(ndev, "Cannot get termination-ohms: %pe\n", ERR_PTR(ret)); return ret; } if (term > U16_MAX) { netdev_err(ndev, "Invalid termination-ohms value (%u > %u)\n", term, U16_MAX); return -EINVAL; } priv->termination_const_cnt = ARRAY_SIZE(priv->termination_gpio_ohms); priv->termination_const = priv->termination_gpio_ohms; priv->termination_gpio = gpio; priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_DISABLED] = CAN_TERMINATION_DISABLED; priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED] = term; priv->do_set_termination = can_set_termination; return 0; } static bool can_bittiming_const_valid(const struct can_bittiming_const *btc) { if (!btc) return true; if (!btc->sjw_max) return false; return true; } /* Register the CAN network device */ int register_candev(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); int err; /* Ensure termination_const, termination_const_cnt and * do_set_termination consistency. All must be either set or * unset. */ if ((!priv->termination_const != !priv->termination_const_cnt) || (!priv->termination_const != !priv->do_set_termination)) return -EINVAL; if (!priv->bitrate_const != !priv->bitrate_const_cnt) return -EINVAL; if (!priv->fd.data_bitrate_const != !priv->fd.data_bitrate_const_cnt) return -EINVAL; /* We only support either fixed bit rates or bit timing const. */ if ((priv->bitrate_const || priv->fd.data_bitrate_const) && (priv->bittiming_const || priv->fd.data_bittiming_const)) return -EINVAL; if (!can_bittiming_const_valid(priv->bittiming_const) || !can_bittiming_const_valid(priv->fd.data_bittiming_const)) return -EINVAL; if (!priv->termination_const) { err = can_get_termination(dev); if (err) return err; } dev->rtnl_link_ops = &can_link_ops; netif_carrier_off(dev); return register_netdev(dev); } EXPORT_SYMBOL_GPL(register_candev); /* Unregister the CAN network device */ void unregister_candev(struct net_device *dev) { unregister_netdev(dev); } EXPORT_SYMBOL_GPL(unregister_candev); /* Test if a network device is a candev based device * and return the can_priv* if so. */ struct can_priv *safe_candev_priv(struct net_device *dev) { if (dev->type != ARPHRD_CAN || dev->rtnl_link_ops != &can_link_ops) return NULL; return netdev_priv(dev); } EXPORT_SYMBOL_GPL(safe_candev_priv); static __init int can_dev_init(void) { int err; err = can_netlink_register(); if (!err) pr_info("CAN device driver interface\n"); return err; } module_init(can_dev_init); static __exit void can_dev_exit(void) { can_netlink_unregister(); } module_exit(can_dev_exit); MODULE_ALIAS_RTNL_LINK("can");
1 1 6 5 5 5 1 3 3 1 2 1 1 1 2 1 1 1 1 2 2 1 1 1 1 1 1 16 16 2 1 7 4 5 4 5 5 4 9 9 9 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 // SPDX-License-Identifier: GPL-2.0+ /* * Generic USB driver for report based interrupt in/out devices * like LD Didactic's USB devices. LD Didactic's USB devices are * HID devices which do not use HID report definitons (they use * raw interrupt in and our reports only for communication). * * This driver uses a ring buffer for time critical reading of * interrupt in reports and provides read and write methods for * raw interrupt reports (similar to the Windows HID driver). * Devices based on the book USB COMPLETE by Jan Axelson may need * such a compatibility to the Windows HID driver. * * Copyright (C) 2005 Michael Hund <mhund@ld-didactic.de> * * Derived from Lego USB Tower driver * Copyright (C) 2003 David Glance <advidgsf@sourceforge.net> * 2001-2004 Juergen Stuber <starblue@users.sourceforge.net> */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/input.h> #include <linux/usb.h> #include <linux/poll.h> /* Define these values to match your devices */ #define USB_VENDOR_ID_LD 0x0f11 /* USB Vendor ID of LD Didactic GmbH */ #define USB_DEVICE_ID_LD_CASSY 0x1000 /* USB Product ID of CASSY-S modules with 8 bytes endpoint size */ #define USB_DEVICE_ID_LD_CASSY2 0x1001 /* USB Product ID of CASSY-S modules with 64 bytes endpoint size */ #define USB_DEVICE_ID_LD_POCKETCASSY 0x1010 /* USB Product ID of Pocket-CASSY */ #define USB_DEVICE_ID_LD_POCKETCASSY2 0x1011 /* USB Product ID of Pocket-CASSY 2 (reserved) */ #define USB_DEVICE_ID_LD_MOBILECASSY 0x1020 /* USB Product ID of Mobile-CASSY */ #define USB_DEVICE_ID_LD_MOBILECASSY2 0x1021 /* USB Product ID of Mobile-CASSY 2 (reserved) */ #define USB_DEVICE_ID_LD_MICROCASSYVOLTAGE 0x1031 /* USB Product ID of Micro-CASSY Voltage */ #define USB_DEVICE_ID_LD_MICROCASSYCURRENT 0x1032 /* USB Product ID of Micro-CASSY Current */ #define USB_DEVICE_ID_LD_MICROCASSYTIME 0x1033 /* USB Product ID of Micro-CASSY Time (reserved) */ #define USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE 0x1035 /* USB Product ID of Micro-CASSY Temperature */ #define USB_DEVICE_ID_LD_MICROCASSYPH 0x1038 /* USB Product ID of Micro-CASSY pH */ #define USB_DEVICE_ID_LD_POWERANALYSERCASSY 0x1040 /* USB Product ID of Power Analyser CASSY */ #define USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY 0x1042 /* USB Product ID of Converter Controller CASSY */ #define USB_DEVICE_ID_LD_MACHINETESTCASSY 0x1043 /* USB Product ID of Machine Test CASSY */ #define USB_DEVICE_ID_LD_JWM 0x1080 /* USB Product ID of Joule and Wattmeter */ #define USB_DEVICE_ID_LD_DMMP 0x1081 /* USB Product ID of Digital Multimeter P (reserved) */ #define USB_DEVICE_ID_LD_UMIP 0x1090 /* USB Product ID of UMI P */ #define USB_DEVICE_ID_LD_UMIC 0x10A0 /* USB Product ID of UMI C */ #define USB_DEVICE_ID_LD_UMIB 0x10B0 /* USB Product ID of UMI B */ #define USB_DEVICE_ID_LD_XRAY 0x1100 /* USB Product ID of X-Ray Apparatus 55481 */ #define USB_DEVICE_ID_LD_XRAY2 0x1101 /* USB Product ID of X-Ray Apparatus 554800 */ #define USB_DEVICE_ID_LD_XRAYCT 0x1110 /* USB Product ID of X-Ray Apparatus CT 554821*/ #define USB_DEVICE_ID_LD_VIDEOCOM 0x1200 /* USB Product ID of VideoCom */ #define USB_DEVICE_ID_LD_MOTOR 0x1210 /* USB Product ID of Motor (reserved) */ #define USB_DEVICE_ID_LD_COM3LAB 0x2000 /* USB Product ID of COM3LAB */ #define USB_DEVICE_ID_LD_TELEPORT 0x2010 /* USB Product ID of Terminal Adapter */ #define USB_DEVICE_ID_LD_NETWORKANALYSER 0x2020 /* USB Product ID of Network Analyser */ #define USB_DEVICE_ID_LD_POWERCONTROL 0x2030 /* USB Product ID of Converter Control Unit */ #define USB_DEVICE_ID_LD_MACHINETEST 0x2040 /* USB Product ID of Machine Test System */ #define USB_DEVICE_ID_LD_MOSTANALYSER 0x2050 /* USB Product ID of MOST Protocol Analyser */ #define USB_DEVICE_ID_LD_MOSTANALYSER2 0x2051 /* USB Product ID of MOST Protocol Analyser 2 */ #define USB_DEVICE_ID_LD_ABSESP 0x2060 /* USB Product ID of ABS ESP */ #define USB_DEVICE_ID_LD_AUTODATABUS 0x2070 /* USB Product ID of Automotive Data Buses */ #define USB_DEVICE_ID_LD_MCT 0x2080 /* USB Product ID of Microcontroller technique */ #define USB_DEVICE_ID_LD_HYBRID 0x2090 /* USB Product ID of Automotive Hybrid */ #define USB_DEVICE_ID_LD_HEATCONTROL 0x20A0 /* USB Product ID of Heat control */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define USB_LD_MINOR_BASE 0 #else #define USB_LD_MINOR_BASE 176 #endif /* table of devices that work with this driver */ static const struct usb_device_id ld_usb_table[] = { { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CASSY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POCKETCASSY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOBILECASSY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYVOLTAGE) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYCURRENT) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTIME) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYTEMPERATURE) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MICROCASSYPH) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERANALYSERCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_CONVERTERCONTROLLERCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETESTCASSY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_JWM) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_DMMP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIC) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_UMIB) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_XRAY2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_VIDEOCOM) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOTOR) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_COM3LAB) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_TELEPORT) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_NETWORKANALYSER) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_POWERCONTROL) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MACHINETEST) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOSTANALYSER) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MOSTANALYSER2) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_ABSESP) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_AUTODATABUS) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_MCT) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HYBRID) }, { USB_DEVICE(USB_VENDOR_ID_LD, USB_DEVICE_ID_LD_HEATCONTROL) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, ld_usb_table); MODULE_AUTHOR("Michael Hund <mhund@ld-didactic.de>"); MODULE_DESCRIPTION("LD USB Driver"); MODULE_LICENSE("GPL"); /* All interrupt in transfers are collected in a ring buffer to * avoid racing conditions and get better performance of the driver. */ static int ring_buffer_size = 128; module_param(ring_buffer_size, int, 0000); MODULE_PARM_DESC(ring_buffer_size, "Read ring buffer size in reports"); /* The write_buffer can contain more than one interrupt out transfer. */ static int write_buffer_size = 10; module_param(write_buffer_size, int, 0000); MODULE_PARM_DESC(write_buffer_size, "Write buffer size in reports"); /* As of kernel version 2.6.4 ehci-hcd uses an * "only one interrupt transfer per frame" shortcut * to simplify the scheduling of periodic transfers. * This conflicts with our standard 1ms intervals for in and out URBs. * We use default intervals of 2ms for in and 2ms for out transfers, * which should be fast enough. * Increase the interval to allow more devices that do interrupt transfers, * or set to 1 to use the standard interval from the endpoint descriptors. */ static int min_interrupt_in_interval = 2; module_param(min_interrupt_in_interval, int, 0000); MODULE_PARM_DESC(min_interrupt_in_interval, "Minimum interrupt in interval in ms"); static int min_interrupt_out_interval = 2; module_param(min_interrupt_out_interval, int, 0000); MODULE_PARM_DESC(min_interrupt_out_interval, "Minimum interrupt out interval in ms"); /* Structure to hold all of our device specific stuff */ struct ld_usb { struct mutex mutex; /* locks this structure */ struct usb_interface *intf; /* save off the usb interface pointer */ unsigned long disconnected:1; int open_count; /* number of times this port has been opened */ char *ring_buffer; unsigned int ring_head; unsigned int ring_tail; wait_queue_head_t read_wait; wait_queue_head_t write_wait; char *interrupt_in_buffer; struct usb_endpoint_descriptor *interrupt_in_endpoint; struct urb *interrupt_in_urb; int interrupt_in_interval; size_t interrupt_in_endpoint_size; int interrupt_in_running; int interrupt_in_done; int buffer_overflow; spinlock_t rbsl; char *interrupt_out_buffer; struct usb_endpoint_descriptor *interrupt_out_endpoint; struct urb *interrupt_out_urb; int interrupt_out_interval; size_t interrupt_out_endpoint_size; int interrupt_out_busy; }; static struct usb_driver ld_usb_driver; /* * ld_usb_abort_transfers * aborts transfers and frees associated data structures */ static void ld_usb_abort_transfers(struct ld_usb *dev) { /* shutdown transfer */ if (dev->interrupt_in_running) { dev->interrupt_in_running = 0; usb_kill_urb(dev->interrupt_in_urb); } if (dev->interrupt_out_busy) usb_kill_urb(dev->interrupt_out_urb); } /* * ld_usb_delete */ static void ld_usb_delete(struct ld_usb *dev) { /* free data structures */ usb_free_urb(dev->interrupt_in_urb); usb_free_urb(dev->interrupt_out_urb); kfree(dev->ring_buffer); kfree(dev->interrupt_in_buffer); kfree(dev->interrupt_out_buffer); kfree(dev); } /* * ld_usb_interrupt_in_callback */ static void ld_usb_interrupt_in_callback(struct urb *urb) { struct ld_usb *dev = urb->context; size_t *actual_buffer; unsigned int next_ring_head; int status = urb->status; unsigned long flags; int retval; if (status) { if (status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN) { goto exit; } else { dev_dbg(&dev->intf->dev, "%s: nonzero status received: %d\n", __func__, status); spin_lock_irqsave(&dev->rbsl, flags); goto resubmit; /* maybe we can recover */ } } spin_lock_irqsave(&dev->rbsl, flags); if (urb->actual_length > 0) { next_ring_head = (dev->ring_head+1) % ring_buffer_size; if (next_ring_head != dev->ring_tail) { actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_head * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); /* actual_buffer gets urb->actual_length + interrupt_in_buffer */ *actual_buffer = urb->actual_length; memcpy(actual_buffer+1, dev->interrupt_in_buffer, urb->actual_length); dev->ring_head = next_ring_head; dev_dbg(&dev->intf->dev, "%s: received %d bytes\n", __func__, urb->actual_length); } else { dev_warn(&dev->intf->dev, "Ring buffer overflow, %d bytes dropped\n", urb->actual_length); dev->buffer_overflow = 1; } } resubmit: /* resubmit if we're still running */ if (dev->interrupt_in_running && !dev->buffer_overflow) { retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC); if (retval) { dev_err(&dev->intf->dev, "usb_submit_urb failed (%d)\n", retval); dev->buffer_overflow = 1; } } spin_unlock_irqrestore(&dev->rbsl, flags); exit: dev->interrupt_in_done = 1; wake_up_interruptible(&dev->read_wait); } /* * ld_usb_interrupt_out_callback */ static void ld_usb_interrupt_out_callback(struct urb *urb) { struct ld_usb *dev = urb->context; int status = urb->status; /* sync/async unlink faults aren't errors */ if (status && !(status == -ENOENT || status == -ECONNRESET || status == -ESHUTDOWN)) dev_dbg(&dev->intf->dev, "%s - nonzero write interrupt status received: %d\n", __func__, status); dev->interrupt_out_busy = 0; wake_up_interruptible(&dev->write_wait); } /* * ld_usb_open */ static int ld_usb_open(struct inode *inode, struct file *file) { struct ld_usb *dev; int subminor; int retval; struct usb_interface *interface; stream_open(inode, file); subminor = iminor(inode); interface = usb_find_interface(&ld_usb_driver, subminor); if (!interface) { printk(KERN_ERR "%s - error, can't find device for minor %d\n", __func__, subminor); return -ENODEV; } dev = usb_get_intfdata(interface); if (!dev) return -ENODEV; /* lock this device */ if (mutex_lock_interruptible(&dev->mutex)) return -ERESTARTSYS; /* allow opening only once */ if (dev->open_count) { retval = -EBUSY; goto unlock_exit; } dev->open_count = 1; /* initialize in direction */ dev->ring_head = 0; dev->ring_tail = 0; dev->buffer_overflow = 0; usb_fill_int_urb(dev->interrupt_in_urb, interface_to_usbdev(interface), usb_rcvintpipe(interface_to_usbdev(interface), dev->interrupt_in_endpoint->bEndpointAddress), dev->interrupt_in_buffer, dev->interrupt_in_endpoint_size, ld_usb_interrupt_in_callback, dev, dev->interrupt_in_interval); dev->interrupt_in_running = 1; dev->interrupt_in_done = 0; retval = usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL); if (retval) { dev_err(&interface->dev, "Couldn't submit interrupt_in_urb %d\n", retval); dev->interrupt_in_running = 0; dev->open_count = 0; goto unlock_exit; } /* save device in the file's private structure */ file->private_data = dev; unlock_exit: mutex_unlock(&dev->mutex); return retval; } /* * ld_usb_release */ static int ld_usb_release(struct inode *inode, struct file *file) { struct ld_usb *dev; int retval = 0; dev = file->private_data; if (dev == NULL) { retval = -ENODEV; goto exit; } mutex_lock(&dev->mutex); if (dev->open_count != 1) { retval = -ENODEV; goto unlock_exit; } if (dev->disconnected) { /* the device was unplugged before the file was released */ mutex_unlock(&dev->mutex); /* unlock here as ld_usb_delete frees dev */ ld_usb_delete(dev); goto exit; } /* wait until write transfer is finished */ if (dev->interrupt_out_busy) wait_event_interruptible_timeout(dev->write_wait, !dev->interrupt_out_busy, 2 * HZ); ld_usb_abort_transfers(dev); dev->open_count = 0; unlock_exit: mutex_unlock(&dev->mutex); exit: return retval; } /* * ld_usb_poll */ static __poll_t ld_usb_poll(struct file *file, poll_table *wait) { struct ld_usb *dev; __poll_t mask = 0; dev = file->private_data; if (dev->disconnected) return EPOLLERR | EPOLLHUP; poll_wait(file, &dev->read_wait, wait); poll_wait(file, &dev->write_wait, wait); if (dev->ring_head != dev->ring_tail) mask |= EPOLLIN | EPOLLRDNORM; if (!dev->interrupt_out_busy) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } /* * ld_usb_read */ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct ld_usb *dev; size_t *actual_buffer; size_t bytes_to_read; int retval = 0; int rv; dev = file->private_data; /* verify that we actually have some data to read */ if (count == 0) goto exit; /* lock this object */ if (mutex_lock_interruptible(&dev->mutex)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; } /* wait for data */ spin_lock_irq(&dev->rbsl); while (dev->ring_head == dev->ring_tail) { dev->interrupt_in_done = 0; spin_unlock_irq(&dev->rbsl); if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible(dev->read_wait, dev->interrupt_in_done); if (retval < 0) goto unlock_exit; spin_lock_irq(&dev->rbsl); } spin_unlock_irq(&dev->rbsl); /* actual_buffer contains actual_length + interrupt_in_buffer */ actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); if (*actual_buffer > dev->interrupt_in_endpoint_size) { retval = -EIO; goto unlock_exit; } bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) dev_warn(&dev->intf->dev, "Read buffer overflow, %zu bytes dropped\n", *actual_buffer-bytes_to_read); /* copy one interrupt_in_buffer from ring_buffer into userspace */ if (copy_to_user(buffer, actual_buffer+1, bytes_to_read)) { retval = -EFAULT; goto unlock_exit; } retval = bytes_to_read; spin_lock_irq(&dev->rbsl); dev->ring_tail = (dev->ring_tail + 1) % ring_buffer_size; if (dev->buffer_overflow) { dev->buffer_overflow = 0; spin_unlock_irq(&dev->rbsl); rv = usb_submit_urb(dev->interrupt_in_urb, GFP_KERNEL); if (rv < 0) dev->buffer_overflow = 1; } else { spin_unlock_irq(&dev->rbsl); } unlock_exit: /* unlock the device */ mutex_unlock(&dev->mutex); exit: return retval; } /* * ld_usb_write */ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct ld_usb *dev; size_t bytes_to_write; int retval = 0; dev = file->private_data; /* verify that we actually have some data to write */ if (count == 0) goto exit; /* lock this object */ if (mutex_lock_interruptible(&dev->mutex)) { retval = -ERESTARTSYS; goto exit; } /* verify that the device wasn't unplugged */ if (dev->disconnected) { retval = -ENODEV; printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval); goto unlock_exit; } /* wait until previous transfer is finished */ if (dev->interrupt_out_busy) { if (file->f_flags & O_NONBLOCK) { retval = -EAGAIN; goto unlock_exit; } retval = wait_event_interruptible(dev->write_wait, !dev->interrupt_out_busy); if (retval < 0) { goto unlock_exit; } } /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size); if (bytes_to_write < count) dev_warn(&dev->intf->dev, "Write buffer overflow, %zu bytes dropped\n", count - bytes_to_write); dev_dbg(&dev->intf->dev, "%s: count = %zu, bytes_to_write = %zu\n", __func__, count, bytes_to_write); if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) { retval = -EFAULT; goto unlock_exit; } if (dev->interrupt_out_endpoint == NULL) { /* try HID_REQ_SET_REPORT=9 on control_endpoint instead of interrupt_out_endpoint */ retval = usb_control_msg(interface_to_usbdev(dev->intf), usb_sndctrlpipe(interface_to_usbdev(dev->intf), 0), 9, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, 1 << 8, 0, dev->interrupt_out_buffer, bytes_to_write, USB_CTRL_SET_TIMEOUT); if (retval < 0) dev_err(&dev->intf->dev, "Couldn't submit HID_REQ_SET_REPORT %d\n", retval); goto unlock_exit; } /* send off the urb */ usb_fill_int_urb(dev->interrupt_out_urb, interface_to_usbdev(dev->intf), usb_sndintpipe(interface_to_usbdev(dev->intf), dev->interrupt_out_endpoint->bEndpointAddress), dev->interrupt_out_buffer, bytes_to_write, ld_usb_interrupt_out_callback, dev, dev->interrupt_out_interval); dev->interrupt_out_busy = 1; wmb(); retval = usb_submit_urb(dev->interrupt_out_urb, GFP_KERNEL); if (retval) { dev->interrupt_out_busy = 0; dev_err(&dev->intf->dev, "Couldn't submit interrupt_out_urb %d\n", retval); goto unlock_exit; } retval = bytes_to_write; unlock_exit: /* unlock the device */ mutex_unlock(&dev->mutex); exit: return retval; } /* file operations needed when we register this driver */ static const struct file_operations ld_usb_fops = { .owner = THIS_MODULE, .read = ld_usb_read, .write = ld_usb_write, .open = ld_usb_open, .release = ld_usb_release, .poll = ld_usb_poll, }; /* * usb class driver info in order to get a minor number from the usb core, * and to have the device registered with the driver core */ static struct usb_class_driver ld_usb_class = { .name = "ldusb%d", .fops = &ld_usb_fops, .minor_base = USB_LD_MINOR_BASE, }; /* * ld_usb_probe * * Called by the usb core when a new device is connected that it thinks * this driver might be interested in. */ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct ld_usb *dev = NULL; struct usb_host_interface *iface_desc; char *buffer; int retval = -ENOMEM; int res; /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto exit; mutex_init(&dev->mutex); spin_lock_init(&dev->rbsl); dev->intf = intf; init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); /* workaround for early firmware versions on fast computers */ if ((le16_to_cpu(udev->descriptor.idVendor) == USB_VENDOR_ID_LD) && ((le16_to_cpu(udev->descriptor.idProduct) == USB_DEVICE_ID_LD_CASSY) || (le16_to_cpu(udev->descriptor.idProduct) == USB_DEVICE_ID_LD_COM3LAB)) && (le16_to_cpu(udev->descriptor.bcdDevice) <= 0x103)) { buffer = kmalloc(256, GFP_KERNEL); if (!buffer) goto error; /* usb_string makes SETUP+STALL to leave always ControlReadLoop */ usb_string(udev, 255, buffer, 256); kfree(buffer); } iface_desc = intf->cur_altsetting; res = usb_find_last_int_in_endpoint(iface_desc, &dev->interrupt_in_endpoint); if (res) { dev_err(&intf->dev, "Interrupt in endpoint not found\n"); retval = res; goto error; } res = usb_find_last_int_out_endpoint(iface_desc, &dev->interrupt_out_endpoint); if (res) dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n"); dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint); dev->ring_buffer = kcalloc(ring_buffer_size, sizeof(size_t) + dev->interrupt_in_endpoint_size, GFP_KERNEL); if (!dev->ring_buffer) goto error; dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL); if (!dev->interrupt_in_buffer) goto error; dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_in_urb) goto error; dev->interrupt_out_endpoint_size = dev->interrupt_out_endpoint ? usb_endpoint_maxp(dev->interrupt_out_endpoint) : udev->descriptor.bMaxPacketSize0; dev->interrupt_out_buffer = kmalloc_array(write_buffer_size, dev->interrupt_out_endpoint_size, GFP_KERNEL); if (!dev->interrupt_out_buffer) goto error; dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->interrupt_out_urb) goto error; dev->interrupt_in_interval = max_t(int, min_interrupt_in_interval, dev->interrupt_in_endpoint->bInterval); if (dev->interrupt_out_endpoint) dev->interrupt_out_interval = max_t(int, min_interrupt_out_interval, dev->interrupt_out_endpoint->bInterval); /* we can register the device now, as it is ready */ usb_set_intfdata(intf, dev); retval = usb_register_dev(intf, &ld_usb_class); if (retval) { /* something prevented us from registering this driver */ dev_err(&intf->dev, "Not able to get a minor for this device.\n"); usb_set_intfdata(intf, NULL); goto error; } /* let the user know what node this device is now attached to */ dev_info(&intf->dev, "LD USB Device #%d now attached to major %d minor %d\n", (intf->minor - USB_LD_MINOR_BASE), USB_MAJOR, intf->minor); exit: return retval; error: ld_usb_delete(dev); return retval; } /* * ld_usb_disconnect * * Called by the usb core when the device is removed from the system. */ static void ld_usb_disconnect(struct usb_interface *intf) { struct ld_usb *dev; int minor; dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); minor = intf->minor; /* give back our minor */ usb_deregister_dev(intf, &ld_usb_class); usb_poison_urb(dev->interrupt_in_urb); usb_poison_urb(dev->interrupt_out_urb); mutex_lock(&dev->mutex); /* if the device is not opened, then we clean up right now */ if (!dev->open_count) { mutex_unlock(&dev->mutex); ld_usb_delete(dev); } else { dev->disconnected = 1; /* wake up pollers */ wake_up_interruptible_all(&dev->read_wait); wake_up_interruptible_all(&dev->write_wait); mutex_unlock(&dev->mutex); } dev_info(&intf->dev, "LD USB Device #%d now disconnected\n", (minor - USB_LD_MINOR_BASE)); } /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver ld_usb_driver = { .name = "ldusb", .probe = ld_usb_probe, .disconnect = ld_usb_disconnect, .id_table = ld_usb_table, }; module_usb_driver(ld_usb_driver);
21 21 21 21 21 18 21 2 19 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 // SPDX-License-Identifier: GPL-2.0-or-later /* * Randomness driver for virtio * Copyright (C) 2007, 2008 Rusty Russell IBM Corporation */ #include <asm/barrier.h> #include <linux/err.h> #include <linux/hw_random.h> #include <linux/scatterlist.h> #include <linux/spinlock.h> #include <linux/virtio.h> #include <linux/virtio_rng.h> #include <linux/module.h> #include <linux/slab.h> static DEFINE_IDA(rng_index_ida); struct virtrng_info { struct hwrng hwrng; struct virtqueue *vq; char name[25]; int index; bool hwrng_register_done; bool hwrng_removed; /* data transfer */ struct completion have_data; unsigned int data_avail; unsigned int data_idx; /* minimal size returned by rng_buffer_size() */ #if SMP_CACHE_BYTES < 32 u8 data[32]; #else u8 data[SMP_CACHE_BYTES]; #endif }; static void random_recv_done(struct virtqueue *vq) { struct virtrng_info *vi = vq->vdev->priv; unsigned int len; /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ if (!virtqueue_get_buf(vi->vq, &len)) return; smp_store_release(&vi->data_avail, len); complete(&vi->have_data); } static void request_entropy(struct virtrng_info *vi) { struct scatterlist sg; reinit_completion(&vi->have_data); vi->data_idx = 0; sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); virtqueue_kick(vi->vq); } static unsigned int copy_data(struct virtrng_info *vi, void *buf, unsigned int size) { size = min_t(unsigned int, size, vi->data_avail); memcpy(buf, vi->data + vi->data_idx, size); vi->data_idx += size; vi->data_avail -= size; if (vi->data_avail == 0) request_entropy(vi); return size; } static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; struct virtrng_info *vi = (struct virtrng_info *)rng->priv; unsigned int chunk; size_t read; if (vi->hwrng_removed) return -ENODEV; read = 0; /* copy available data */ if (smp_load_acquire(&vi->data_avail)) { chunk = copy_data(vi, buf, size); size -= chunk; read += chunk; } if (!wait) return read; /* We have already copied available entropy, * so either size is 0 or data_avail is 0 */ while (size != 0) { /* data_avail is 0 but a request is pending */ ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; /* if vi->data_avail is 0, we have been interrupted * by a cleanup, but buffer stays in the queue */ if (vi->data_avail == 0) return read; chunk = copy_data(vi, buf + read, size); size -= chunk; read += chunk; } return read; } static void virtio_cleanup(struct hwrng *rng) { struct virtrng_info *vi = (struct virtrng_info *)rng->priv; complete(&vi->have_data); } static int probe_common(struct virtio_device *vdev) { int err, index; struct virtrng_info *vi = NULL; vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL); if (!vi) return -ENOMEM; vi->index = index = ida_alloc(&rng_index_ida, GFP_KERNEL); if (index < 0) { err = index; goto err_ida; } sprintf(vi->name, "virtio_rng.%d", index); init_completion(&vi->have_data); vi->hwrng = (struct hwrng) { .read = virtio_read, .cleanup = virtio_cleanup, .priv = (unsigned long)vi, .name = vi->name, }; vdev->priv = vi; /* We expect a single virtqueue. */ vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input"); if (IS_ERR(vi->vq)) { err = PTR_ERR(vi->vq); goto err_find; } virtio_device_ready(vdev); /* we always have a pending entropy request */ request_entropy(vi); return 0; err_find: ida_free(&rng_index_ida, index); err_ida: kfree(vi); return err; } static void remove_common(struct virtio_device *vdev) { struct virtrng_info *vi = vdev->priv; vi->hwrng_removed = true; vi->data_avail = 0; vi->data_idx = 0; complete(&vi->have_data); if (vi->hwrng_register_done) hwrng_unregister(&vi->hwrng); virtio_reset_device(vdev); vdev->config->del_vqs(vdev); ida_free(&rng_index_ida, vi->index); kfree(vi); } static int virtrng_probe(struct virtio_device *vdev) { return probe_common(vdev); } static void virtrng_remove(struct virtio_device *vdev) { remove_common(vdev); } static void virtrng_scan(struct virtio_device *vdev) { struct virtrng_info *vi = vdev->priv; int err; err = hwrng_register(&vi->hwrng); if (!err) vi->hwrng_register_done = true; } static int virtrng_freeze(struct virtio_device *vdev) { remove_common(vdev); return 0; } static int virtrng_restore(struct virtio_device *vdev) { int err; err = probe_common(vdev); if (!err) { struct virtrng_info *vi = vdev->priv; /* * Set hwrng_removed to ensure that virtio_read() * does not block waiting for data before the * registration is complete. */ vi->hwrng_removed = true; err = hwrng_register(&vi->hwrng); if (!err) { vi->hwrng_register_done = true; vi->hwrng_removed = false; } } return err; } static const struct virtio_device_id id_table[] = { { VIRTIO_ID_RNG, VIRTIO_DEV_ANY_ID }, { 0 }, }; static struct virtio_driver virtio_rng_driver = { .driver.name = KBUILD_MODNAME, .id_table = id_table, .probe = virtrng_probe, .remove = virtrng_remove, .scan = virtrng_scan, .freeze = pm_sleep_ptr(virtrng_freeze), .restore = pm_sleep_ptr(virtrng_restore), }; module_virtio_driver(virtio_rng_driver); MODULE_DEVICE_TABLE(virtio, id_table); MODULE_DESCRIPTION("Virtio random number driver"); MODULE_LICENSE("GPL");
616 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_ICMPV6_H #define _LINUX_ICMPV6_H #include <linux/skbuff.h> #include <linux/ipv6.h> #include <uapi/linux/icmpv6.h> static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) { return (struct icmp6hdr *)skb_transport_header(skb); } #include <linux/netdevice.h> #if IS_ENABLED(CONFIG_IPV6) typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm); void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm); #if IS_BUILTIN(CONFIG_IPV6) static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct inet6_skb_parm *parm) { icmp6_send(skb, type, code, info, NULL, parm); } static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) { BUILD_BUG_ON(fn != icmp6_send); return 0; } static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) { BUILD_BUG_ON(fn != icmp6_send); return 0; } #else extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct inet6_skb_parm *parm); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); #endif static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { __icmpv6_send(skb, type, code, info, IP6CB(skb)); } int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len); #if IS_ENABLED(CONFIG_NF_NAT) void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); #else static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { struct inet6_skb_parm parm = { 0 }; __icmpv6_send(skb_in, type, code, info, &parm); } #endif #else static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { } static inline void icmpv6_ndo_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { } #endif extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); extern void icmpv6_cleanup(void); extern void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, enum skb_drop_reason reason); struct flowi6; struct in6_addr; void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif); static inline void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_param_prob_reason(skb, code, pos, SKB_DROP_REASON_NOT_SPECIFIED); } static inline bool icmpv6_is_err(int type) { switch (type) { case ICMPV6_DEST_UNREACH: case ICMPV6_PKT_TOOBIG: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: return true; } return false; } #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PROCESSOR_H #define _ASM_X86_PROCESSOR_H #include <asm/processor-flags.h> /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; struct io_bitmap; struct vm86; #include <asm/math_emu.h> #include <asm/segment.h> #include <asm/types.h> #include <uapi/asm/sigcontext.h> #include <asm/current.h> #include <asm/cpufeatures.h> #include <asm/cpuid/api.h> #include <asm/page.h> #include <asm/pgtable_types.h> #include <asm/percpu.h> #include <asm/desc_defs.h> #include <asm/nops.h> #include <asm/special_insns.h> #include <asm/fpu/types.h> #include <asm/unwind_hints.h> #include <asm/vmxfeatures.h> #include <asm/vdso/processor.h> #include <asm/shstk.h> #include <linux/personality.h> #include <linux/cache.h> #include <linux/threads.h> #include <linux/math64.h> #include <linux/err.h> #include <linux/irqflags.h> #include <linux/mem_encrypt.h> /* * We handle most unaligned accesses in hardware. On the other hand * unaligned DMA can be quite expensive on some Nehalem processors. * * Based on this we disable the IP header alignment in network drivers. */ #define NET_IP_ALIGN 0 #define HBP_NUM 4 /* * These alignment constraints are for performance in the vSMP case, * but in the task_struct case we must also meet hardware imposed * alignment requirements of the FPU state: */ #ifdef CONFIG_X86_VSMP # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) #else # define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif extern u16 __read_mostly tlb_lli_4k; extern u16 __read_mostly tlb_lli_2m; extern u16 __read_mostly tlb_lli_4m; extern u16 __read_mostly tlb_lld_4k; extern u16 __read_mostly tlb_lld_2m; extern u16 __read_mostly tlb_lld_4m; extern u16 __read_mostly tlb_lld_1g; /* * CPU type and hardware bug flags. Kept separately for each CPU. */ struct cpuinfo_topology { // Real APIC ID read from the local APIC u32 apicid; // The initial APIC ID provided by CPUID u32 initial_apicid; // Physical package ID u32 pkg_id; // Physical die ID on AMD, Relative on Intel u32 die_id; // Compute unit ID - AMD specific u32 cu_id; // Core ID relative to the package u32 core_id; // Logical ID mappings u32 logical_pkg_id; u32 logical_die_id; u32 logical_core_id; // AMD Node ID and Nodes per Package info u32 amd_node_id; // Cache level topology IDs u32 llc_id; u32 l2c_id; // Hardware defined CPU-type union { u32 cpu_type; struct { // CPUID.1A.EAX[23-0] u32 intel_native_model_id :24; // CPUID.1A.EAX[31-24] u32 intel_type :8; }; struct { // CPUID 0x80000026.EBX u32 amd_num_processors :16, amd_power_eff_ranking :8, amd_native_model_id :4, amd_type :4; }; }; }; struct cpuinfo_x86 { union { /* * The particular ordering (low-to-high) of (vendor, * family, model) is done in case range of models, like * it is usually done on AMD, need to be compared. */ struct { __u8 x86_model; /* CPU family */ __u8 x86; /* CPU vendor */ __u8 x86_vendor; __u8 x86_reserved; }; /* combined vendor, family, model */ __u32 x86_vfm; }; __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; #endif #ifdef CONFIG_X86_VMX_FEATURE_NAMES __u32 vmx_capability[NVMXINTS]; #endif __u8 x86_virt_bits; __u8 x86_phys_bits; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ int cpuid_level; /* * Align to size of unsigned long because the x86_capability array * is passed to bitops which require the alignment. Use unnamed * union to enforce the array is aligned to size of unsigned long. */ union { __u32 x86_capability[NCAPINTS + NBUGINTS]; unsigned long x86_capability_alignment; }; char x86_vendor_id[16]; char x86_model_id[64]; struct cpuinfo_topology topo; /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; /* protected processor identification number */ u64 ppin; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; /* Index into per_cpu list: */ u16 cpu_index; /* Is SMT active on this core? */ bool smt_active; u32 microcode; /* Address space bits used by the cache internally */ u8 x86_cache_bits; unsigned initialized : 1; } __randomize_layout; #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 #define X86_VENDOR_AMD 2 #define X86_VENDOR_UMC 3 #define X86_VENDOR_CENTAUR 5 #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 #define X86_VENDOR_ZHAOXIN 10 #define X86_VENDOR_VORTEX 11 #define X86_VENDOR_NUM 12 #define X86_VENDOR_UNKNOWN 0xff /* * capabilities of CPUs */ extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); static inline unsigned long long l1tf_pfn_limit(void) { return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT); } void init_cpu_devs(void); void get_cpu_vendor(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_secondary_cpu(unsigned int cpu); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); /* * Friendlier CR3 helpers. */ static inline unsigned long read_cr3_pa(void) { return __read_cr3() & CR3_ADDR_MASK; } static inline unsigned long native_read_cr3_pa(void) { return __native_read_cr3() & CR3_ADDR_MASK; } static inline void load_cr3(pgd_t *pgdir) { write_cr3(__sme_pa(pgdir)); } /* * Note that while the legacy 'TSS' name comes from 'Task State Segment', * on modern x86 CPUs the TSS also holds information important to 64-bit mode, * unrelated to the task-switch mechanism: */ #ifdef CONFIG_X86_32 /* This is the TSS defined by the hardware. */ struct x86_hw_tss { unsigned short back_link, __blh; unsigned long sp0; unsigned short ss0, __ss0h; unsigned long sp1; /* * We don't use ring 1, so ss1 is a convenient scratch space in * the same cacheline as sp0. We use ss1 to cache the value in * MSR_IA32_SYSENTER_CS. When we context switch * MSR_IA32_SYSENTER_CS, we first check if the new value being * written matches ss1, and, if it's not, then we wrmsr the new * value and update ss1. * * The only reason we context switch MSR_IA32_SYSENTER_CS is * that we set it to zero in vm86 tasks to avoid corrupting the * stack if we were to go through the sysenter path from vm86 * mode. */ unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ unsigned short __ss1h; unsigned long sp2; unsigned short ss2, __ss2h; unsigned long __cr3; unsigned long ip; unsigned long flags; unsigned long ax; unsigned long cx; unsigned long dx; unsigned long bx; unsigned long sp; unsigned long bp; unsigned long si; unsigned long di; unsigned short es, __esh; unsigned short cs, __csh; unsigned short ss, __ssh; unsigned short ds, __dsh; unsigned short fs, __fsh; unsigned short gs, __gsh; unsigned short ldt, __ldth; unsigned short trace; unsigned short io_bitmap_base; } __attribute__((packed)); #else struct x86_hw_tss { u32 reserved1; u64 sp0; u64 sp1; /* * Since Linux does not use ring 2, the 'sp2' slot is unused by * hardware. entry_SYSCALL_64 uses it as scratch space to stash * the user RSP value. */ u64 sp2; u64 reserved2; u64 ist[7]; u32 reserved3; u32 reserved4; u16 reserved5; u16 io_bitmap_base; } __attribute__((packed)); #endif /* * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) #define IO_BITMAP_OFFSET_VALID_MAP \ (offsetof(struct tss_struct, io_bitmap.bitmap) - \ offsetof(struct tss_struct, x86_tss)) #define IO_BITMAP_OFFSET_VALID_ALL \ (offsetof(struct tss_struct, io_bitmap.mapall) - \ offsetof(struct tss_struct, x86_tss)) #ifdef CONFIG_X86_IOPL_IOPERM /* * sizeof(unsigned long) coming from an extra "long" at the end of the * iobitmap. The limit is inclusive, i.e. the last valid byte. */ # define __KERNEL_TSS_LIMIT \ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ sizeof(unsigned long) - 1) #else # define __KERNEL_TSS_LIMIT \ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) #endif /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { char stack[PAGE_SIZE]; }; struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); /* * All IO bitmap related data stored in the TSS: */ struct x86_io_bitmap { /* The sequence number of the last active bitmap. */ u64 prev_sequence; /* * Store the dirty size of the last io bitmap offender. The next * one will have to do the cleanup as the switch out to a non io * bitmap user will just set x86_tss.io_bitmap_base to a value * outside of the TSS limit. So for sane tasks there is no need to * actually touch the io_bitmap at all. */ unsigned int prev_max; /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ unsigned long bitmap[IO_BITMAP_LONGS + 1]; /* * Special I/O bitmap to emulate IOPL(3). All bytes zero, * except the additional byte at the end. */ unsigned long mapall[IO_BITMAP_LONGS + 1]; }; struct tss_struct { /* * The fixed hardware portion. This must not cross a page boundary * at risk of violating the SDM's advice and potentially triggering * errata. */ struct x86_hw_tss x86_tss; struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); /* Per CPU interrupt stacks */ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); #ifdef CONFIG_X86_64 DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse); #else DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr); #endif DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack); /* const-qualified alias provided by the linker. */ DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override, const_cpu_current_top_of_stack); #ifdef CONFIG_X86_64 static inline unsigned long cpu_kernelmode_gs_base(int cpu) { #ifdef CONFIG_SMP return per_cpu_offset(cpu); #else return 0; #endif } extern asmlinkage void entry_SYSCALL32_ignore(void); /* Save actual FS/GS selectors and bases to current->thread */ void current_save_fsgs(void); #endif /* X86_64 */ struct perf_event; struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; #ifdef CONFIG_X86_32 unsigned long sp0; #endif unsigned long sp; #ifdef CONFIG_X86_32 unsigned long sysenter_cs; #else unsigned short es; unsigned short ds; unsigned short fsindex; unsigned short gsindex; #endif #ifdef CONFIG_X86_64 unsigned long fsbase; unsigned long gsbase; #else /* * XXX: this could presumably be unsigned short. Alternatively, * 32-bit kernels could be taught to use fsindex instead. */ unsigned long fs; unsigned long gs; #endif /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ unsigned long virtual_dr6; /* Keep track of the exact dr7 value set by the user */ unsigned long ptrace_dr7; /* Fault info: */ unsigned long cr2; unsigned long trap_nr; unsigned long error_code; #ifdef CONFIG_VM86 /* Virtual 86 mode info */ struct vm86 *vm86; #endif /* IO permissions: */ struct io_bitmap *io_bitmap; /* * IOPL. Privilege level dependent I/O permission which is * emulated via the I/O bitmap to prevent user space from disabling * interrupts. */ unsigned long iopl_emul; unsigned int iopl_warn:1; /* * Protection Keys Register for Userspace. Loaded immediately on * context switch. Store it in thread_struct to avoid a lookup in * the tasks's FPU xstate buffer. This value is only valid when a * task is scheduled out. For 'current' the authoritative source of * PKRU is the hardware itself. */ u32 pkru; #ifdef CONFIG_X86_USER_SHADOW_STACK unsigned long features; unsigned long features_locked; struct thread_shstk shstk; #endif }; #ifdef CONFIG_X86_DEBUG_FPU extern struct fpu *x86_task_fpu(struct task_struct *task); #else # define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task)))) #endif extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size); static inline void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size) { fpu_thread_struct_whitelist(offset, size); } static inline void native_load_sp0(unsigned long sp0) { this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } static __always_inline void native_swapgs(void) { #ifdef CONFIG_X86_64 asm volatile("swapgs" ::: "memory"); #endif } static __always_inline unsigned long current_top_of_stack(void) { /* * We can't read directly from tss.sp0: sp0 on x86_32 is special in * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT)) return this_cpu_read_const(const_cpu_current_top_of_stack); return this_cpu_read_stable(cpu_current_top_of_stack); } static __always_inline bool on_thread_stack(void) { return (unsigned long)(current_top_of_stack() - current_stack_pointer) < THREAD_SIZE; } #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else static inline void load_sp0(unsigned long sp0) { native_load_sp0(sp0); } #endif /* CONFIG_PARAVIRT_XXL */ unsigned long __get_wchan(struct task_struct *p); extern void select_idle_routine(void); extern void amd_e400_c1e_apic_setup(void); extern unsigned long boot_option_idle_override; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; extern void enable_sep_cpu(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void cpu_init(void); extern void cpu_init_exception_handling(bool boot_cpu); extern void cpu_init_replace_early_idt(void); extern void cr4_init(void); extern void set_task_blockstep(struct task_struct *task, bool on); /* Boot loader type from the setup header: */ extern int bootloader_type; extern int bootloader_version; extern char ignore_fpu_irq; #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 #define ARCH_HAS_PREFETCHW #ifdef CONFIG_X86_32 # define BASE_PREFETCH "" # define ARCH_HAS_PREFETCH #else # define BASE_PREFETCH "prefetcht0 %1" #endif /* * Prefetch instructions for Pentium III (+) and AMD Athlon (+) * * It's not worth to care about 3dnow prefetches for the K6 * because they are microcoded there and very slow. */ static inline void prefetch(const void *x) { alternative_input(BASE_PREFETCH, "prefetchnta %1", X86_FEATURE_XMM, "m" (*(const char *)x)); } /* * 3dnow prefetch to get an exclusive cache line. * Useful for spinlocks to avoid one state transition in the * cache coherency protocol: */ static __always_inline void prefetchw(const void *x) { alternative_input(BASE_PREFETCH, "prefetchw %1", X86_FEATURE_3DNOWPREFETCH, "m" (*(const char *)x)); } #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) #define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) #define task_pt_regs(task) \ ({ \ unsigned long __ptr = (unsigned long)task_stack_page(task); \ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ ((struct pt_regs *)__ptr) - 1; \ }) #ifdef CONFIG_X86_32 #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ } #else extern unsigned long __top_init_kernel_stack[]; #define INIT_THREAD { \ .sp = (unsigned long)&__top_init_kernel_stack, \ } #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); /* * This decides where the kernel will search for a free chunk of vm * space during mmap's. */ #define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3)) #define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) #define KSTK_EIP(task) (task_pt_regs(task)->ip) #define KSTK_ESP(task) (task_pt_regs(task)->sp) /* Get/set a process' ability to use the timestamp counter instruction */ #define GET_TSC_CTL(adr) get_tsc_mode((adr)) #define SET_TSC_CTL(val) set_tsc_mode((val)) extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); static inline u32 per_cpu_llc_id(unsigned int cpu) { return per_cpu(cpu_info.topo.llc_id, cpu); } static inline u32 per_cpu_l2c_id(unsigned int cpu) { return per_cpu(cpu_info.topo.l2c_id, cpu); } #ifdef CONFIG_CPU_SUP_AMD /* * Issue a DIV 0/1 insn to clear any division data from previous DIV * operations. */ static __always_inline void amd_clear_divider(void) { asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) :: "a" (0), "d" (0), "r" (1)); } extern void amd_check_microcode(void); #else static inline void amd_clear_divider(void) { } static inline void amd_check_microcode(void) { } #endif extern unsigned long arch_align_stack(unsigned long sp); void free_init_pages(const char *what, unsigned long begin, unsigned long end); extern void free_kernel_image_pages(const char *what, void *begin, void *end); void default_idle(void); #ifdef CONFIG_XEN bool xen_set_default_idle(void); #else #define xen_set_default_idle 0 #endif void __noreturn stop_this_cpu(void *dummy); void microcode_check(struct cpuinfo_x86 *prev_info); void store_cpu_caps(struct cpuinfo_x86 *info); enum l1tf_mitigations { L1TF_MITIGATION_OFF, L1TF_MITIGATION_AUTO, L1TF_MITIGATION_FLUSH_NOWARN, L1TF_MITIGATION_FLUSH, L1TF_MITIGATION_FLUSH_NOSMT, L1TF_MITIGATION_FULL, L1TF_MITIGATION_FULL_FORCE }; extern enum l1tf_mitigations l1tf_mitigation; enum mds_mitigations { MDS_MITIGATION_OFF, MDS_MITIGATION_AUTO, MDS_MITIGATION_FULL, MDS_MITIGATION_VMWERV, }; extern bool gds_ucode_mitigated(void); /* * Make previous memory operations globally visible before * a WRMSR. * * MFENCE makes writes visible, but only affects load/store * instructions. WRMSR is unfortunately not a load/store * instruction and is unaffected by MFENCE. The LFENCE ensures * that the WRMSR is not reordered. * * Most WRMSRs are full serializing instructions themselves and * do not require this barrier. This is only required for the * IA32_TSC_DEADLINE and X2APIC MSRs. */ static inline void weak_wrmsr_fence(void) { alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE)); } #endif /* _ASM_X86_PROCESSOR_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 /* SPDX-License-Identifier: GPL-2.0-or-later * * Copyright (C) 2005 David Brownell */ #ifndef __LINUX_SPI_H #define __LINUX_SPI_H #include <linux/acpi.h> #include <linux/bits.h> #include <linux/completion.h> #include <linux/device.h> #include <linux/gpio/consumer.h> #include <linux/kthread.h> #include <linux/mod_devicetable.h> #include <linux/overflow.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/u64_stats_sync.h> #include <uapi/linux/spi/spi.h> /* Max no. of CS supported per spi device */ #define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; struct ptp_system_timestamp; struct spi_controller; struct spi_transfer; struct spi_controller_mem_ops; struct spi_controller_mem_caps; struct spi_message; struct spi_offload; struct spi_offload_config; /* * INTERFACES between SPI controller-side drivers and SPI target protocol handlers, * and SPI infrastructure. */ extern const struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers * @syncp: seqcount to protect members in this struct for per-cpu update * on 32-bit systems * * @messages: number of spi-messages handled * @transfers: number of spi_transfers handled * @errors: number of errors during spi_transfer * @timedout: number of timeouts during spi_transfer * * @spi_sync: number of times spi_sync is used * @spi_sync_immediate: * number of times spi_sync is executed immediately * in calling context without queuing and scheduling * @spi_async: number of times spi_async is used * * @bytes: number of bytes transferred to/from device * @bytes_tx: number of bytes sent to device * @bytes_rx: number of bytes received from device * * @transfer_bytes_histo: * transfer bytes histogram * * @transfers_split_maxsize: * number of transfers that have been split because of * maxsize limit */ struct spi_statistics { struct u64_stats_sync syncp; u64_stats_t messages; u64_stats_t transfers; u64_stats_t errors; u64_stats_t timedout; u64_stats_t spi_sync; u64_stats_t spi_sync_immediate; u64_stats_t spi_async; u64_stats_t bytes; u64_stats_t bytes_rx; u64_stats_t bytes_tx; #define SPI_STATISTICS_HISTO_SIZE 17 u64_stats_t transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; u64_stats_t transfers_split_maxsize; }; #define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \ do { \ struct spi_statistics *__lstats; \ get_cpu(); \ __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_add(&__lstats->field, count); \ u64_stats_update_end(&__lstats->syncp); \ put_cpu(); \ } while (0) #define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \ do { \ struct spi_statistics *__lstats; \ get_cpu(); \ __lstats = this_cpu_ptr(pcpu_stats); \ u64_stats_update_begin(&__lstats->syncp); \ u64_stats_inc(&__lstats->field); \ u64_stats_update_end(&__lstats->syncp); \ put_cpu(); \ } while (0) /** * struct spi_delay - SPI delay information * @value: Value for the delay * @unit: Unit for the delay */ struct spi_delay { #define SPI_DELAY_UNIT_USECS 0 #define SPI_DELAY_UNIT_NSECS 1 #define SPI_DELAY_UNIT_SCK 2 u16 value; u8 unit; }; extern int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer); extern int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer); extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_transfer *xfer); /** * struct spi_device - Controller side proxy for an SPI target device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. * @bits_per_word: Data transfers involve one or more words; word sizes * like eight or 12 bits are common. In-memory wordsizes are * powers of two bytes (e.g. 20 bit samples use 32 bits). * This may be changed by the device's driver, or left at the * default (0) indicating protocol words are eight bit bytes. * The spi_transfer.bits_per_word can override this for each transfer. * @rt: Make the pump thread real time priority. * @mode: The spi mode defines how data is clocked out and in. * This may be changed by the device's driver. * The "active low" default for chipselect mode can be overridden * (by specifying SPI_CS_HIGH) as can the "MSB first" default for * each word in a transfer (by specifying SPI_LSB_FIRST). * @irq: Negative, or the number passed to request_irq() to receive * interrupts from this device. * @controller_state: Controller's runtime state * @controller_data: Board-specific definitions for controller, such as * FIFO initialization parameters; from board_info.controller_data * @modalias: Name of the driver to use with this device, or an alias * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging * @driver_override: If the name of a driver is written to this attribute, then * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted * @cs_hold: delay to be introduced by the controller before CS is deasserted * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. * @chip_select: Array of physical chipselect, spi->chipselect[i] gives * the corresponding physical CS for logical CS i. * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines * (optional, NULL when not using a GPIO line) * * A @spi_device is used to interchange data between an SPI target device * (usually a discrete chip) and CPU memory. * * In @dev, the platform_data is used to hold information about this * device that's meaningful to the device's protocol driver, but not * to its controller. One example might be an identifier for a chip * variant with slightly different functionality; another might be * information about how this particular board wires the chip's pins. */ struct spi_device { struct device dev; struct spi_controller *controller; u32 max_speed_hz; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ #define SPI_NO_RX BIT(30) /* No receive wire */ /* * TPM specification defines flow control over SPI. Client device * can insert a wait state on MISO when address is transmitted by * controller on MOSI. Detecting the wait state in software is only * possible for full duplex controllers. For controllers that support * only half-duplex, the wait state detection needs to be implemented * in hardware. TPM devices would set this flag when hardware flow * control is expected from SPI controller. */ #define SPI_TPM_HW_FLOW BIT(29) /* TPM HW flow control */ /* * All bits defined above should be covered by SPI_MODE_KERNEL_MASK. * The SPI_MODE_KERNEL_MASK has the SPI_MODE_USER_MASK counterpart, * which is defined in 'include/uapi/linux/spi/spi.h'. * The bits defined here are from bit 31 downwards, while in * SPI_MODE_USER_MASK are from 0 upwards. * These bits must not overlap. A static assert check should make sure of that. * If adding extra bits, make sure to decrease the bit index below as well. */ #define SPI_MODE_KERNEL_MASK (~(BIT(29) - 1)) u32 mode; int irq; void *controller_state; void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; /* The statistics */ struct spi_statistics __percpu *pcpu_statistics; struct spi_delay word_delay; /* Inter-word delay */ /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; u8 chip_select[SPI_CS_CNT_MAX]; /* * Bit mask of the chipselect(s) that the driver need to use from * the chipselect array. When the controller is capable to handle * multiple chip selects & memories are connected in parallel * then more than one bit need to be set in cs_index_mask. */ u32 cs_index_mask : SPI_CS_CNT_MAX; struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ /* * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: * - memory packing (12 bit samples into low bits, others zeroed) * - priority * - chipselect delays * - ... */ }; /* Make sure that SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK don't overlap */ static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); #define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev) /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) { return (spi && get_device(&spi->dev)) ? spi : NULL; } static inline void spi_dev_put(struct spi_device *spi) { if (spi) put_device(&spi->dev); } /* ctldata is for the bus_controller driver's runtime state */ static inline void *spi_get_ctldata(const struct spi_device *spi) { return spi->controller_state; } static inline void spi_set_ctldata(struct spi_device *spi, void *state) { spi->controller_state = state; } /* Device driver data */ static inline void spi_set_drvdata(struct spi_device *spi, void *data) { dev_set_drvdata(&spi->dev, data); } static inline void *spi_get_drvdata(const struct spi_device *spi) { return dev_get_drvdata(&spi->dev); } static inline u8 spi_get_chipselect(const struct spi_device *spi, u8 idx) { return spi->chip_select[idx]; } static inline void spi_set_chipselect(struct spi_device *spi, u8 idx, u8 chipselect) { spi->chip_select[idx] = chipselect; } static inline struct gpio_desc *spi_get_csgpiod(const struct spi_device *spi, u8 idx) { return spi->cs_gpiod[idx]; } static inline void spi_set_csgpiod(struct spi_device *spi, u8 idx, struct gpio_desc *csgpiod) { spi->cs_gpiod[idx] = csgpiod; } static inline bool spi_is_csgpiod(struct spi_device *spi) { u8 idx; for (idx = 0; idx < SPI_CS_CNT_MAX; idx++) { if (spi_get_csgpiod(spi, idx)) return true; } return false; } /** * struct spi_driver - Host side "protocol" driver * @id_table: List of SPI devices supported by this driver * @probe: Binds this driver to the SPI device. Drivers can verify * that the device is actually present, and may need to configure * characteristics (such as bits_per_word) which weren't needed for * the initial configuration done during system setup. * @remove: Unbinds this driver from the SPI device * @shutdown: Standard shutdown callback used during system state * transitions such as powerdown/halt and kexec * @driver: SPI device drivers should initialize the name and owner * field of this structure. * * This represents the kind of device driver that uses SPI messages to * interact with the hardware at the other end of a SPI link. It's called * a "protocol" driver because it works through messages rather than talking * directly to SPI hardware (which is what the underlying SPI controller * driver does to pass those messages). These protocols are defined in the * specification for the device(s) supported by the driver. * * As a rule, those device protocols represent the lowest level interface * supported by a driver, and it will support upper level interfaces too. * Examples of such upper levels include frameworks like MTD, networking, * MMC, RTC, filesystem character device nodes, and hardware monitoring. */ struct spi_driver { const struct spi_device_id *id_table; int (*probe)(struct spi_device *spi); void (*remove)(struct spi_device *spi); void (*shutdown)(struct spi_device *spi); struct device_driver driver; }; #define to_spi_driver(__drv) \ ( __drv ? container_of_const(__drv, struct spi_driver, driver) : NULL ) extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv); /** * spi_unregister_driver - reverse effect of spi_register_driver * @sdrv: the driver to unregister * Context: can sleep */ static inline void spi_unregister_driver(struct spi_driver *sdrv) { if (sdrv) driver_unregister(&sdrv->driver); } extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select); /* Use a define to avoid include chaining to get THIS_MODULE */ #define spi_register_driver(driver) \ __spi_register_driver(THIS_MODULE, driver) /** * module_spi_driver() - Helper macro for registering a SPI driver * @__spi_driver: spi_driver struct * * Helper macro for SPI drivers which do not do anything special in module * init/exit. This eliminates a lot of boilerplate. Each module may only * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_spi_driver(__spi_driver) \ module_driver(__spi_driver, spi_register_driver, \ spi_unregister_driver) /** * struct spi_controller - interface to SPI host or target controller * @dev: device interface to this driver * @list: link with the global spi_controller list * @bus_num: board-specific (and often SOC-specific) identifier for a * given SPI controller. * @num_chipselect: chipselects are used to distinguish individual * SPI targets, and are numbered from zero to num_chipselects. * each target has a chipselect signal, but it's common that not * every chipselect is connected to a target. * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @buswidth_override_bits: flags to override for this controller driver * @bits_per_word_mask: A mask indicating which values of bits_per_word are * supported by the driver. Bit n indicates that a bits_per_word n+1 is * supported. If set, the SPI core will reject any transfer with an * unsupported bits_per_word. If not set, this value is simply ignored, * and it's up to the individual driver to perform any validation. * @min_speed_hz: Lowest supported transfer speed * @max_speed_hz: Highest supported transfer speed * @flags: other constraints relevant to this driver * @slave: indicates that this is an SPI slave controller * @target: indicates that this is an SPI target controller * @devm_allocated: whether the allocation of this struct is devres-managed * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @max_message_size: function that returns the max message size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. * @io_mutex: mutex for physical bus access * @add_lock: mutex to avoid adding devices to the same chipselect * @bus_lock_spinlock: spinlock for SPI bus locking * @bus_lock_mutex: mutex for exclusion of multiple callers * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. * It's always safe to call this unless transfers are pending on * the device whose settings are being modified. * @set_cs_timing: optional hook for SPI devices to request SPI * controller for configuring specific CS setup time, hold time and inactive * delay in terms of clock counts * @transfer: adds a message to the controller's transfer queue. * @cleanup: frees controller-specific state * @can_dma: determine whether this controller supports DMA * @dma_map_dev: device which can be used for DMA mapping * @cur_rx_dma_dev: device which is currently used for RX DMA mapping * @cur_tx_dma_dev: device which is currently used for TX DMA mapping * @queued: whether this controller is providing an internal message queue * @kworker: pointer to thread struct for message pump * @pump_messages: work struct for scheduling work to the message pump * @queue_lock: spinlock to synchronise access to message queue * @queue: message queue * @cur_msg: the currently in-flight message * @cur_msg_completion: a completion for the current in-flight message * @cur_msg_incomplete: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to check if the driver has * already called spi_finalize_current_message(). * @cur_msg_need_completion: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() * @fallback: fallback to PIO if DMA transfer return failure with * SPI_TRANS_FAIL_NO_START. * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected * @last_cs_index_mask: bit mask the last chip selects that were used * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running * @rt: whether this queue is set to run as a realtime task * @auto_runtime_pm: the core should ensure a runtime PM reference is held * while the hardware is prepared, using the parent * device for the spidev * @max_dma_len: Maximum length of a DMA transfer for the device. * @prepare_transfer_hardware: a message will soon arrive from the queue * so the subsystem requests the driver to prepare the transfer hardware * by issuing this call * @transfer_one_message: the subsystem calls the driver to transfer a single * message while queuing transfers that arrive in the meantime. When the * driver is finished with this message, it must call * spi_finalize_current_message() so the subsystem can issue the next * message * @unprepare_transfer_hardware: there are currently no more messages on the * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call * * @set_cs: set the logic level of the chip select line. May be called * from interrupt context. * @optimize_message: optimize the message for reuse * @unoptimize_message: release resources allocated by optimize_message * @prepare_message: set up the controller to transfer a single message, * for example doing DMA mapping. Called from threaded * context. * @transfer_one: transfer a single spi_transfer. * * - return 0 if the transfer is finished, * - return 1 if the transfer is still in progress. When * the driver is finished with this transfer it must * call spi_finalize_current_transfer() so the subsystem * can issue the next transfer. If the transfer fails, the * driver must set the flag SPI_TRANS_FAIL_IO to * spi_transfer->error first, before calling * spi_finalize_current_transfer(). * Note: transfer_one and transfer_one_message are mutually * exclusive; when both are set, the generic subsystem does * not call your transfer_one callback. * @handle_err: the subsystem calls the driver to handle an error that occurs * in the generic implementation of transfer_one_message(). * @mem_ops: optimized/dedicated operations for interactions with SPI memory. * This field is optional and should only be implemented if the * controller has native support for memory like operations. * @get_offload: callback for controllers with offload support to get matching * offload instance. Implementations should return -ENODEV if no match is * found. * @put_offload: release the offload instance acquired by @get_offload. * @mem_caps: controller capabilities for the handling of memory operations. * @dtr_caps: true if controller has dtr(single/dual transfer rate) capability. * QSPI based controller should fill this based on controller's capability. * @unprepare_message: undo any work done by prepare_message(). * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab * GPIO descriptors. This will fill in @cs_gpiods and SPI devices will have * the cs_gpiod assigned if a GPIO line is found for the chipselect. * @unused_native_cs: When cs_gpiods is used, spi_register_controller() will * fill in this field with the first unused native CS, to be used by SPI * controller drivers that need to drive a native CS when using GPIO CS. * @max_native_cs: When cs_gpiods is used, and this field is filled in, * spi_register_controller() will validate all native CS (including the * unused native CS) against this value. * @pcpu_statistics: statistics for the spi_controller * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel * @dummy_rx: dummy receive buffer for full-duplex devices * @dummy_tx: dummy transmit buffer for full-duplex devices * @fw_translate_cs: If the boot firmware uses different numbering scheme * what Linux expects, this optional hook can be used to translate * between the two. * @ptp_sts_supported: If the driver sets this to true, it must provide a * time snapshot in @spi_transfer->ptp_sts as close as possible to the * moment in time when @spi_transfer->ptp_sts_word_pre and * @spi_transfer->ptp_sts_word_post were transmitted. * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core * @defer_optimize_message: set to true if controller cannot pre-optimize messages * and needs to defer the optimization step until the message is actually * being transferred * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals * but not chip select signals. Each device may be configured to use a * different clock rate, since those shared signals are ignored unless * the chip is selected. * * The driver for an SPI controller manages access to those devices through * a queue of spi_message transactions, copying data between CPU memory and * an SPI target device. For each such message it queues, it calls the * message's completion function when the transaction completes. */ struct spi_controller { struct device dev; struct list_head list; /* * Other than negative (== assign one dynamically), bus_num is fully * board-specific. Usually that simplifies to being SoC-specific. * example: one SoC has three SPI controllers, numbered 0..2, * and one board's schematics might show it using SPI-2. Software * would normally use bus_num=2 for that controller. */ s16 bus_num; /* * Chipselects will be integral to many controllers; some others * might use board-specific GPIOs. */ u16 num_chipselect; /* Some SPI controllers pose alignment requirements on DMAable * buffers; let protocol drivers know about these requirements. */ u16 dma_alignment; /* spi_device.mode flags understood by this controller driver */ u32 mode_bits; /* spi_device.mode flags override flags for this controller */ u32 buswidth_override_bits; /* Bitmask of supported bits_per_word for transfers */ u32 bits_per_word_mask; #define SPI_BPW_MASK(bits) BIT((bits) - 1) #define SPI_BPW_RANGE_MASK(min, max) GENMASK((max) - 1, (min) - 1) /* Limits on transfer speed */ u32 min_speed_hz; u32 max_speed_hz; /* Other constraints relevant to this driver */ u16 flags; #define SPI_CONTROLLER_HALF_DUPLEX BIT(0) /* Can't do full duplex */ #define SPI_CONTROLLER_NO_RX BIT(1) /* Can't do buffer read */ #define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ #define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select target device */ #define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* * The spi-controller has multi chip select capability and can * assert/de-assert more than one chip select at once. */ #define SPI_CONTROLLER_MULTI_CS BIT(7) /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; union { /* Flag indicating this is an SPI slave controller */ bool slave; /* Flag indicating this is an SPI target controller */ bool target; }; /* * On some hardware transfer / message size may be constrained * the limit may depend on device transfer settings. */ size_t (*max_transfer_size)(struct spi_device *spi); size_t (*max_message_size)(struct spi_device *spi); /* I/O mutex */ struct mutex io_mutex; /* Used to avoid adding the same CS twice */ struct mutex add_lock; /* Lock and mutex for SPI bus locking */ spinlock_t bus_lock_spinlock; struct mutex bus_lock_mutex; /* Flag indicating that the SPI bus is locked for exclusive use */ bool bus_lock_flag; /* * Setup mode and clock, etc (SPI driver may call many times). * * IMPORTANT: this may be called when transfers to another * device are active. DO NOT UPDATE SHARED REGISTERS in ways * which could break those transfers. */ int (*setup)(struct spi_device *spi); /* * set_cs_timing() method is for SPI controllers that supports * configuring CS timing. * * This hook allows SPI client drivers to request SPI controllers * to configure specific CS timing through spi_set_cs_timing() after * spi_setup(). */ int (*set_cs_timing)(struct spi_device *spi); /* * Bidirectional bulk transfers * * + The transfer() method may not sleep; its main role is * just to add the message to the queue. * + For now there's no remove-from-queue operation, or * any other request management * + To a given spi_device, message queueing is pure FIFO * * + The controller's main job is to process its message queue, * selecting a chip (for controllers), then transferring data * + If there are multiple spi_device children, the i/o queue * arbitration algorithm is unspecified (round robin, FIFO, * priority, reservations, preemption, etc) * * + Chipselect stays active during the entire message * (unless modified by spi_transfer.cs_change != 0). * + The message transfers use clock and SPI mode parameters * previously established by setup() for this device */ int (*transfer)(struct spi_device *spi, struct spi_message *mesg); /* Called on release() to free memory provided by spi_controller */ void (*cleanup)(struct spi_device *spi); /* * Used to enable core support for DMA handling, if can_dma() * exists and returns true then the transfer will be mapped * prior to transfer_one() being called. The driver should * not modify or store xfer and dma_tx and dma_rx must be set * while the device is prepared. */ bool (*can_dma)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *xfer); struct device *dma_map_dev; struct device *cur_rx_dma_dev; struct device *cur_tx_dma_dev; /* * These hooks are for drivers that want to use the generic * controller transfer queueing mechanism. If these are used, the * transfer() function above must NOT be specified by the driver. * Over time we expect SPI drivers to be phased over to this API. */ bool queued; struct kthread_worker *kworker; struct kthread_work pump_messages; spinlock_t queue_lock; struct list_head queue; struct spi_message *cur_msg; struct completion cur_msg_completion; bool cur_msg_incomplete; bool cur_msg_need_completion; bool busy; bool running; bool rt; bool auto_runtime_pm; bool fallback; bool last_cs_mode_high; s8 last_cs[SPI_CS_CNT_MAX]; u32 last_cs_index_mask : SPI_CS_CNT_MAX; struct completion xfer_completion; size_t max_dma_len; int (*optimize_message)(struct spi_message *msg); int (*unoptimize_message)(struct spi_message *msg); int (*prepare_transfer_hardware)(struct spi_controller *ctlr); int (*transfer_one_message)(struct spi_controller *ctlr, struct spi_message *mesg); int (*unprepare_transfer_hardware)(struct spi_controller *ctlr); int (*prepare_message)(struct spi_controller *ctlr, struct spi_message *message); int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); int (*target_abort)(struct spi_controller *ctlr); /* * These hooks are for drivers that use a generic implementation * of transfer_one_message() provided by the core. */ void (*set_cs)(struct spi_device *spi, bool enable); int (*transfer_one)(struct spi_controller *ctlr, struct spi_device *spi, struct spi_transfer *transfer); void (*handle_err)(struct spi_controller *ctlr, struct spi_message *message); /* Optimized handlers for SPI memory-like operations. */ const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; /* SPI or QSPI controller can set to true if supports SDR/DDR transfer rate */ bool dtr_caps; struct spi_offload *(*get_offload)(struct spi_device *spi, const struct spi_offload_config *config); void (*put_offload)(struct spi_offload *offload); /* GPIO chip select */ struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; s8 unused_native_cs; s8 max_native_cs; /* Statistics */ struct spi_statistics __percpu *pcpu_statistics; /* DMA channels for use with core dmaengine helpers */ struct dma_chan *dma_tx; struct dma_chan *dma_rx; /* Dummy data for full duplex devices */ void *dummy_rx; void *dummy_tx; int (*fw_translate_cs)(struct spi_controller *ctlr, unsigned cs); /* * Driver sets this field to indicate it is able to snapshot SPI * transfers (needed e.g. for reading the time of POSIX clocks) */ bool ptp_sts_supported; /* Interrupt enable state during PTP system timestamping */ unsigned long irq_flags; /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; bool must_async; bool defer_optimize_message; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) { return dev_get_drvdata(&ctlr->dev); } static inline void spi_controller_set_devdata(struct spi_controller *ctlr, void *data) { dev_set_drvdata(&ctlr->dev, data); } static inline struct spi_controller *spi_controller_get(struct spi_controller *ctlr) { if (!ctlr || !get_device(&ctlr->dev)) return NULL; return ctlr; } static inline void spi_controller_put(struct spi_controller *ctlr) { if (ctlr) put_device(&ctlr->dev); } static inline bool spi_controller_is_target(struct spi_controller *ctlr) { return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target; } /* PM calls that need to be issued by the driver */ extern int spi_controller_suspend(struct spi_controller *ctlr); extern int spi_controller_resume(struct spi_controller *ctlr); /* Calls the driver make to interact with the message queue */ extern struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr); extern void spi_finalize_current_message(struct spi_controller *ctlr); extern void spi_finalize_current_transfer(struct spi_controller *ctlr); /* Helper calls for driver to timestamp transfer */ void spi_take_timestamp_pre(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); void spi_take_timestamp_post(struct spi_controller *ctlr, struct spi_transfer *xfer, size_t progress, bool irqs_off); /* The SPI driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool target); static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) { return __spi_alloc_controller(dev, size, false); } static inline struct spi_controller *spi_alloc_target(struct device *dev, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __spi_alloc_controller(dev, size, true); } struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool target); static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) { return __devm_spi_alloc_controller(dev, size, false); } static inline struct spi_controller *devm_spi_alloc_target(struct device *dev, unsigned int size) { if (!IS_ENABLED(CONFIG_SPI_SLAVE)) return NULL; return __devm_spi_alloc_controller(dev, size, true); } extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); #if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); #else static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) { return NULL; } static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index) { return ERR_PTR(-ENODEV); } static inline int acpi_spi_count_resources(struct acpi_device *adev) { return 0; } #endif /* * SPI resource management while processing a SPI message */ typedef void (*spi_res_release_t)(struct spi_controller *ctlr, struct spi_message *msg, void *res); /** * struct spi_res - SPI resource management structure * @entry: list entry * @release: release code called prior to freeing this resource * @data: extra data allocated for the specific use-case * * This is based on ideas from devres, but focused on life-cycle * management during spi_message processing. */ struct spi_res { struct list_head entry; spi_res_release_t release; unsigned long long data[]; /* Guarantee ull alignment */ }; /*---------------------------------------------------------------------------*/ /* * I/O INTERFACE between SPI controller and protocol drivers * * Protocol drivers use a queue of spi_messages, each transferring data * between the controller and memory buffers. * * The spi_messages themselves consist of a series of read+write transfer * segments. Those segments always read the same number of bits as they * write; but one or the other is easily ignored by passing a NULL buffer * pointer. (This is unlike most types of I/O API, because SPI hardware * is full duplex.) * * NOTE: Allocation of spi_transfer and spi_message memory is entirely * up to the protocol driver, which guarantees the integrity of both (as * well as the data buffers) for as long as the message is queued. */ /** * struct spi_transfer - a read/write buffer pair * @tx_buf: data to be written (DMA-safe memory), or NULL * @rx_buf: data to be read (DMA-safe memory), or NULL * @tx_dma: DMA address of tx_buf, currently not for client use * @rx_dma: DMA address of rx_buf, currently not for client use * @tx_nbits: number of bits used for writing. If 0 the default * (SPI_NBITS_SINGLE) is used. * @rx_nbits: number of bits used for reading. If 0 the default * (SPI_NBITS_SINGLE) is used. * @len: size of rx and tx buffers (in bytes) * @speed_hz: Select a speed other than the device default for this * transfer. If 0 the default (from @spi_device) is used. * @bits_per_word: select a bits_per_word other than the device default * for this transfer. If 0 the default (from @spi_device) is used. * @dummy_data: indicates transfer is dummy bytes transfer. * @cs_off: performs the transfer with chipselect off. * @cs_change: affects chipselect after this transfer completes * @cs_change_delay: delay between cs deassert and assert when * @cs_change is set and @spi_transfer is not the last in @spi_message * @delay: delay to be introduced after this transfer before * (optionally) changing the chipselect status, then starting * the next transfer or completing this @spi_message. * @word_delay: inter word delay to be introduced after each word size * (set by bits_per_word) transmission. * @effective_speed_hz: the effective SCK-speed that was used to * transfer this transfer. Set to 0 if the SPI bus driver does * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers * @tx_sg_mapped: If true, the @tx_sg is mapped for DMA * @rx_sg_mapped: If true, the @rx_sg is mapped for DMA * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use * @offload_flags: Flags that are only applicable to specialized SPI offload * transfers. See %SPI_OFFLOAD_XFER_* in spi-offload.h. * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset * within @tx_buf for which the SPI device is requesting that the time * snapshot for this transfer begins. Upon completing the SPI transfer, * this value may have changed compared to what was requested, depending * on the available snapshotting resolution (DMA transfer, * @ptp_sts_supported is false, etc). * @ptp_sts_word_post: See @ptp_sts_word_post. The two can be equal (meaning * that a single byte should be snapshotted). * If the core takes care of the timestamp (if @ptp_sts_supported is false * for this controller), it will set @ptp_sts_word_pre to 0, and * @ptp_sts_word_post to the length of the transfer. This is done * purposefully (instead of setting to spi_transfer->len - 1) to denote * that a transfer-level snapshot taken from within the driver may still * be of higher quality. * @ptp_sts: Pointer to a memory location held by the SPI target device where a * PTP system timestamp structure may lie. If drivers use PIO or their * hardware has some sort of assist for retrieving exact transfer timing, * they can (and should) assert @ptp_sts_supported and populate this * structure using the ptp_read_system_*ts helper functions. * The timestamp must represent the time at which the SPI target device has * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. * @dtr_mode: true if supports double transfer rate. * @timestamped: true if the transfer has been timestamped * @error: Error status logged by SPI controller driver. * * SPI transfers always write the same number of bytes as they read. * Protocol drivers should always provide @rx_buf and/or @tx_buf. * In some cases, they may also want to provide DMA addresses for * the data being transferred; that may reduce overhead, when the * underlying driver uses DMA. * * If the transmit buffer is NULL, zeroes will be shifted out * while filling @rx_buf. If the receive buffer is NULL, the data * shifted in will be discarded. Only "len" bytes shift out (or in). * It's an error to try to shift out a partial word. (For example, by * shifting out three bytes with word size of sixteen or twenty bits; * the former uses two bytes per word, the latter uses four bytes.) * * In-memory data values are always in native CPU byte order, translated * from the wire byte order (big-endian except with SPI_LSB_FIRST). So * for example when bits_per_word is sixteen, buffers are 2N bytes long * (@len = 2N) and hold N sixteen bit words in CPU byte order. * * When the word size of the SPI transfer is not a power-of-two multiple * of eight bits, those in-memory words include extra bits. In-memory * words are always seen by protocol drivers as right-justified, so the * undefined (rx) or unused (tx) bits are always the most significant bits. * * All SPI transfers start with the relevant chipselect active. Normally * it stays selected until after the last transfer in a message. Drivers * can affect the chipselect signal using cs_change. * * (i) If the transfer isn't the last one in the message, this flag is * used to make the chipselect briefly go inactive in the middle of the * message. Toggling chipselect in this way may be needed to terminate * a chip command, letting a single spi_message perform all of group of * chip transactions together. * * (ii) When the transfer is the last one in the message, the chip may * stay selected until the next transfer. On multi-device SPI busses * with nothing blocking messages going to other devices, this is just * a performance hint; starting a message to another device deselects * this one. But in other cases, this can be used to ensure correctness. * Some devices need protocol transactions to be built from a series of * spi_message submissions, where the content of one message is determined * by the results of previous messages and where the whole transaction * ends when the chipselect goes inactive. * * When SPI can transfer in 1x,2x or 4x. It can get this transfer information * from device through @tx_nbits and @rx_nbits. In Bi-direction, these * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x) * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer. * * User may also set dtr_mode to true to use dual transfer mode if desired. if * not, default considered as single transfer mode. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to * insulate against future API updates. After you submit a message * and its transfers, ignore them until its completion callback. */ struct spi_transfer { /* * It's okay if tx_buf == rx_buf (right?). * For MicroWire, one buffer must be NULL. * Buffers must work with dma_*map_single() calls. */ const void *tx_buf; void *rx_buf; unsigned len; #define SPI_TRANS_FAIL_NO_START BIT(0) #define SPI_TRANS_FAIL_IO BIT(1) u16 error; bool tx_sg_mapped; bool rx_sg_mapped; struct sg_table tx_sg; struct sg_table rx_sg; dma_addr_t tx_dma; dma_addr_t rx_dma; unsigned dummy_data:1; unsigned cs_off:1; unsigned cs_change:1; unsigned tx_nbits:4; unsigned rx_nbits:4; unsigned timestamped:1; bool dtr_mode; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ #define SPI_NBITS_OCTAL 0x08 /* 8-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; struct spi_delay word_delay; u32 speed_hz; u32 effective_speed_hz; /* Use %SPI_OFFLOAD_XFER_* from spi-offload.h */ unsigned int offload_flags; unsigned int ptp_sts_word_pre; unsigned int ptp_sts_word_post; struct ptp_system_timestamp *ptp_sts; struct list_head transfer_list; }; /** * struct spi_message - one multi-segment SPI transaction * @transfers: list of transfer segments in this transaction * @spi: SPI device to which the transaction is queued * @pre_optimized: peripheral driver pre-optimized the message * @optimized: the message is in the optimized state * @prepared: spi_prepare_message was called for the this message * @status: zero for success, else negative errno * @complete: called to report transaction completions * @context: the argument to complete() when it's called * @frame_length: the total number of bytes in the message * @actual_length: the total number of bytes that were transferred in all * successful segments * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message * @opt_state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed * @offload: (optional) offload instance used by this message * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" * in the sense that no other spi_message may use that SPI bus until that * sequence completes. On some systems, many such sequences can execute as * a single programmed DMA transfer. On all systems, these messages are * queued, and might complete after transactions to other devices. Messages * sent to a given spi_device are always executed in FIFO order. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to * insulate against future API updates. After you submit a message * and its transfers, ignore them until its completion callback. */ struct spi_message { struct list_head transfers; struct spi_device *spi; /* spi_optimize_message() was called for this message */ bool pre_optimized; /* __spi_optimize_message() was called for this message */ bool optimized; /* spi_prepare_message() was called for this message */ bool prepared; /* * REVISIT: we might want a flag affecting the behavior of the * last transfer ... allowing things like "read 16 bit length L" * immediately followed by "read L bytes". Basically imposing * a specific message scheduling algorithm. * * Some controller drivers (message-at-a-time queue processing) * could provide that as their default scheduling algorithm. But * others (with multi-message pipelines) could need a flag to * tell them about such special cases. */ /* Completion is reported through a callback */ int status; void (*complete)(void *context); void *context; unsigned frame_length; unsigned actual_length; /* * For optional use by whatever driver currently owns the * spi_message ... between calls to spi_async and then later * complete(), that's the spi_controller controller driver. */ struct list_head queue; void *state; /* * Optional state for use by controller driver between calls to * __spi_optimize_message() and __spi_unoptimize_message(). */ void *opt_state; /* * Optional offload instance used by this message. This must be set * by the peripheral driver before calling spi_optimize_message(). */ struct spi_offload *offload; /* List of spi_res resources when the SPI message is processed */ struct list_head resources; }; static inline void spi_message_init_no_memset(struct spi_message *m) { INIT_LIST_HEAD(&m->transfers); INIT_LIST_HEAD(&m->resources); } static inline void spi_message_init(struct spi_message *m) { memset(m, 0, sizeof *m); spi_message_init_no_memset(m); } static inline void spi_message_add_tail(struct spi_transfer *t, struct spi_message *m) { list_add_tail(&t->transfer_list, &m->transfers); } static inline void spi_transfer_del(struct spi_transfer *t) { list_del(&t->transfer_list); } static inline int spi_transfer_delay_exec(struct spi_transfer *t) { return spi_delay_exec(&t->delay, t); } /** * spi_message_init_with_transfers - Initialize spi_message and append transfers * @m: spi_message to be initialized * @xfers: An array of SPI transfers * @num_xfers: Number of items in the xfer array * * This function initializes the given spi_message and adds each spi_transfer in * the given array to the message. */ static inline void spi_message_init_with_transfers(struct spi_message *m, struct spi_transfer *xfers, unsigned int num_xfers) { unsigned int i; spi_message_init(m); for (i = 0; i < num_xfers; ++i) spi_message_add_tail(&xfers[i], m); } /* * It's fine to embed message and transaction structures in other data * structures so long as you don't free them while they're in use. */ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags) { struct spi_message_with_transfers { struct spi_message m; struct spi_transfer t[]; } *mwt; unsigned i; mwt = kzalloc(struct_size(mwt, t, ntrans), flags); if (!mwt) return NULL; spi_message_init_no_memset(&mwt->m); for (i = 0; i < ntrans; i++) spi_message_add_tail(&mwt->t[i], &mwt->m); return &mwt->m; } static inline void spi_message_free(struct spi_message *m) { kfree(m); } extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg); extern void spi_unoptimize_message(struct spi_message *msg); extern int devm_spi_optimize_message(struct device *dev, struct spi_device *spi, struct spi_message *msg); extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_target_abort(struct spi_device *spi); static inline size_t spi_max_message_size(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; if (!ctlr->max_message_size) return SIZE_MAX; return ctlr->max_message_size(spi); } static inline size_t spi_max_transfer_size(struct spi_device *spi) { struct spi_controller *ctlr = spi->controller; size_t tr_max = SIZE_MAX; size_t msg_max = spi_max_message_size(spi); if (ctlr->max_transfer_size) tr_max = ctlr->max_transfer_size(spi); /* Transfer size limit must not be greater than message size limit */ return min(tr_max, msg_max); } /** * spi_is_bpw_supported - Check if bits per word is supported * @spi: SPI device * @bpw: Bits per word * * This function checks to see if the SPI controller supports @bpw. * * Returns: * True if @bpw is supported, false otherwise. */ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) { u32 bpw_mask = spi->controller->bits_per_word_mask; if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw))) return true; return false; } /** * spi_bpw_to_bytes - Covert bits per word to bytes * @bpw: Bits per word * * This function converts the given @bpw to bytes. The result is always * power-of-two, e.g., * * =============== ================= * Input (in bits) Output (in bytes) * =============== ================= * 5 1 * 9 2 * 21 4 * 37 8 * =============== ================= * * It will return 0 for the 0 input. * * Returns: * Bytes for the given @bpw. */ static inline u32 spi_bpw_to_bytes(u32 bpw) { return roundup_pow_of_two(BITS_TO_BYTES(bpw)); } /** * spi_controller_xfer_timeout - Compute a suitable timeout value * @ctlr: SPI device * @xfer: Transfer descriptor * * Compute a relevant timeout value for the given transfer. We derive the time * that it would take on a single data line and take twice this amount of time * with a minimum of 500ms to avoid false positives on loaded systems. * * Returns: Transfer timeout value in milliseconds. */ static inline unsigned int spi_controller_xfer_timeout(struct spi_controller *ctlr, struct spi_transfer *xfer) { return max(xfer->len * 8 * 2 / (xfer->speed_hz / 1000), 500U); } /*---------------------------------------------------------------------------*/ /* SPI transfer replacement methods which make use of spi_res */ struct spi_replaced_transfers; typedef void (*spi_replaced_release_t)(struct spi_controller *ctlr, struct spi_message *msg, struct spi_replaced_transfers *res); /** * struct spi_replaced_transfers - structure describing the spi_transfer * replacements that have occurred * so that they can get reverted * @release: some extra release code to get executed prior to * releasing this structure * @extradata: pointer to some extra data if requested or NULL * @replaced_transfers: transfers that have been replaced and which need * to get restored * @replaced_after: the transfer after which the @replaced_transfers * are to get re-inserted * @inserted: number of transfers inserted * @inserted_transfers: array of spi_transfers of array-size @inserted, * that have been replacing replaced_transfers * * Note: that @extradata will point to @inserted_transfers[@inserted] * if some extra allocation is requested, so alignment will be the same * as for spi_transfers. */ struct spi_replaced_transfers { spi_replaced_release_t release; void *extradata; struct list_head replaced_transfers; struct list_head *replaced_after; size_t inserted; struct spi_transfer inserted_transfers[]; }; /*---------------------------------------------------------------------------*/ /* SPI transfer transformation methods */ extern int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, size_t maxsize); extern int spi_split_transfers_maxwords(struct spi_controller *ctlr, struct spi_message *msg, size_t maxwords); /*---------------------------------------------------------------------------*/ /* * All these synchronous SPI transfer routines are utilities layered * over the core async transfer primitive. Here, "synchronous" means * they will sleep uninterruptibly until the async transfer completes. */ extern int spi_sync(struct spi_device *spi, struct spi_message *message); extern int spi_sync_locked(struct spi_device *spi, struct spi_message *message); extern int spi_bus_lock(struct spi_controller *ctlr); extern int spi_bus_unlock(struct spi_controller *ctlr); /** * spi_sync_transfer - synchronous SPI data transfer * @spi: device with which data will be exchanged * @xfers: An array of spi_transfers * @num_xfers: Number of items in the xfer array * Context: can sleep * * Does a synchronous SPI data transfer of the given spi_transfer array. * * For more specific semantics see spi_sync(). * * Return: zero on success, else a negative error code. */ static inline int spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, unsigned int num_xfers) { struct spi_message msg; spi_message_init_with_transfers(&msg, xfers, num_xfers); return spi_sync(spi, &msg); } /** * spi_write - SPI synchronous write * @spi: device to which data will be written * @buf: data buffer * @len: data buffer size * Context: can sleep * * This function writes the buffer @buf. * Callable only from contexts that can sleep. * * Return: zero on success, else a negative error code. */ static inline int spi_write(struct spi_device *spi, const void *buf, size_t len) { struct spi_transfer t = { .tx_buf = buf, .len = len, }; return spi_sync_transfer(spi, &t, 1); } /** * spi_read - SPI synchronous read * @spi: device from which data will be read * @buf: data buffer * @len: data buffer size * Context: can sleep * * This function reads the buffer @buf. * Callable only from contexts that can sleep. * * Return: zero on success, else a negative error code. */ static inline int spi_read(struct spi_device *spi, void *buf, size_t len) { struct spi_transfer t = { .rx_buf = buf, .len = len, }; return spi_sync_transfer(spi, &t, 1); } /* This copies txbuf and rxbuf data; for small transfers only! */ extern int spi_write_then_read(struct spi_device *spi, const void *txbuf, unsigned n_tx, void *rxbuf, unsigned n_rx); /** * spi_w8r8 - SPI synchronous 8 bit write followed by 8 bit read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * Callable only from contexts that can sleep. * * Return: the (unsigned) eight bit number returned by the * device, or else a negative error code. */ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd) { ssize_t status; u8 result; status = spi_write_then_read(spi, &cmd, 1, &result, 1); /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } /** * spi_w8r16 - SPI synchronous 8 bit write followed by 16 bit read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * The number is returned in wire-order, which is at least sometimes * big-endian. * * Callable only from contexts that can sleep. * * Return: the (unsigned) sixteen bit number returned by the * device, or else a negative error code. */ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) { ssize_t status; u16 result; status = spi_write_then_read(spi, &cmd, 1, &result, 2); /* Return negative errno or unsigned value */ return (status < 0) ? status : result; } /** * spi_w8r16be - SPI synchronous 8 bit write followed by 16 bit big-endian read * @spi: device with which data will be exchanged * @cmd: command to be written before data is read back * Context: can sleep * * This function is similar to spi_w8r16, with the exception that it will * convert the read 16 bit data word from big-endian to native endianness. * * Callable only from contexts that can sleep. * * Return: the (unsigned) sixteen bit number returned by the device in CPU * endianness, or else a negative error code. */ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) { ssize_t status; __be16 result; status = spi_write_then_read(spi, &cmd, 1, &result, 2); if (status < 0) return status; return be16_to_cpu(result); } /*---------------------------------------------------------------------------*/ /* * INTERFACE between board init code and SPI infrastructure. * * No SPI driver ever sees these SPI device table segments, but * it's how the SPI core (or adapters that get hotplugged) grows * the driver model tree. * * As a rule, SPI devices can't be probed. Instead, board init code * provides a table listing the devices which are present, with enough * information to bind and set up the device's driver. There's basic * support for non-static configurations too; enough to handle adding * parport adapters, or microcontrollers acting as USB-to-SPI bridges. */ /** * struct spi_board_info - board-specific template for a SPI device * @modalias: Initializes spi_device.modalias; identifies the driver. * @platform_data: Initializes spi_device.platform_data; the particular * data stored there is driver-specific. * @swnode: Software node for the device. * @controller_data: Initializes spi_device.controller_data; some * controllers need hints about hardware setup, e.g. for DMA. * @irq: Initializes spi_device.irq; depends on how the board is wired. * @max_speed_hz: Initializes spi_device.max_speed_hz; based on limits * from the chip datasheet and board-specific signal quality issues. * @bus_num: Identifies which spi_controller parents the spi_device; unused * by spi_new_device(), and otherwise depends on board wiring. * @chip_select: Initializes spi_device.chip_select; depends on how * the board is wired. * @mode: Initializes spi_device.mode; based on the chip datasheet, board * wiring (some devices support both 3WIRE and standard modes), and * possibly presence of an inverter in the chipselect path. * * When adding new SPI devices to the device tree, these structures serve * as a partial device template. They hold information which can't always * be determined by drivers. Information that probe() can establish (such * as the default transfer wordsize) is not included here. * * These structures are used in two places. Their primary role is to * be stored in tables of board-specific device descriptors, which are * declared early in board initialization and then used (much later) to * populate a controller's device tree after the that controller's driver * initializes. A secondary (and atypical) role is as a parameter to * spi_new_device() call, which happens after those controller drivers * are active in some dynamic board configuration models. */ struct spi_board_info { /* * The device name and module name are coupled, like platform_bus; * "modalias" is normally the driver name. * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, * IRQ is copied too. */ char modalias[SPI_NAME_SIZE]; const void *platform_data; const struct software_node *swnode; void *controller_data; int irq; /* Slower signaling on noisy or low voltage boards */ u32 max_speed_hz; /* * bus_num is board specific and matches the bus_num of some * spi_controller that will probably be registered later. * * chip_select reflects how this chip is wired to that controller; * it's less than num_chipselect. */ u16 bus_num; u16 chip_select; /* * mode becomes spi_device.mode, and is essential for chips * where the default of SPI_CS_HIGH = 0 is wrong. */ u32 mode; /* * ... may need additional spi_device chip config data here. * avoid stuff protocol drivers can set; but include stuff * needed to behave without being bound to a driver: * - quirks like clock rate mattering when not selected */ }; #ifdef CONFIG_SPI extern int spi_register_board_info(struct spi_board_info const *info, unsigned n); #else /* Board init code may ignore whether SPI is configured or not */ static inline int spi_register_board_info(struct spi_board_info const *info, unsigned n) { return 0; } #endif /* * If you're hotplugging an adapter with devices (parport, USB, etc) * use spi_new_device() to describe each device. You can also call * spi_unregister_device() to start making that device vanish, but * normally that would be handled by spi_unregister_controller(). * * You can also use spi_alloc_device() and spi_add_device() to use a two * stage registration sequence for each spi_device. This gives the caller * some more control over the spi_device structure before it is registered, * but requires that caller to initialize fields that would otherwise * be defined using the board info. */ extern struct spi_device * spi_alloc_device(struct spi_controller *ctlr); extern int spi_add_device(struct spi_device *spi); extern struct spi_device * spi_new_device(struct spi_controller *, struct spi_board_info *); extern void spi_unregister_device(struct spi_device *spi); extern const struct spi_device_id * spi_get_device_id(const struct spi_device *sdev); extern const void * spi_get_device_match_data(const struct spi_device *sdev); static inline bool spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) { return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } #endif /* __LINUX_SPI_H */
2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 // SPDX-License-Identifier: GPL-2.0-only /* * Driver for the Diolan DLN-2 USB adapter * * Copyright (c) 2014 Intel Corporation * * Derived from: * i2c-diolan-u2c.c * Copyright (c) 2010-2011 Ericsson AB */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/mfd/core.h> #include <linux/mfd/dln2.h> #include <linux/rculist.h> struct dln2_header { __le16 size; __le16 id; __le16 echo; __le16 handle; }; struct dln2_response { struct dln2_header hdr; __le16 result; }; #define DLN2_GENERIC_MODULE_ID 0x00 #define DLN2_GENERIC_CMD(cmd) DLN2_CMD(cmd, DLN2_GENERIC_MODULE_ID) #define CMD_GET_DEVICE_VER DLN2_GENERIC_CMD(0x30) #define CMD_GET_DEVICE_SN DLN2_GENERIC_CMD(0x31) #define DLN2_HW_ID 0x200 #define DLN2_USB_TIMEOUT 200 /* in ms */ #define DLN2_MAX_RX_SLOTS 16 #define DLN2_MAX_URBS 16 #define DLN2_RX_BUF_SIZE 512 enum dln2_handle { DLN2_HANDLE_EVENT = 0, /* don't change, hardware defined */ DLN2_HANDLE_CTRL, DLN2_HANDLE_GPIO, DLN2_HANDLE_I2C, DLN2_HANDLE_SPI, DLN2_HANDLE_ADC, DLN2_HANDLES }; /* * Receive context used between the receive demultiplexer and the transfer * routine. While sending a request the transfer routine will look for a free * receive context and use it to wait for a response and to receive the URB and * thus the response data. */ struct dln2_rx_context { /* completion used to wait for a response */ struct completion done; /* if non-NULL the URB contains the response */ struct urb *urb; /* if true then this context is used to wait for a response */ bool in_use; }; /* * Receive contexts for a particular DLN2 module (i2c, gpio, etc.). We use the * handle header field to identify the module in dln2_dev.mod_rx_slots and then * the echo header field to index the slots field and find the receive context * for a particular request. */ struct dln2_mod_rx_slots { /* RX slots bitmap */ DECLARE_BITMAP(bmap, DLN2_MAX_RX_SLOTS); /* used to wait for a free RX slot */ wait_queue_head_t wq; /* used to wait for an RX operation to complete */ struct dln2_rx_context slots[DLN2_MAX_RX_SLOTS]; /* avoid races between alloc/free_rx_slot and dln2_rx_transfer */ spinlock_t lock; }; struct dln2_dev { struct usb_device *usb_dev; struct usb_interface *interface; u8 ep_in; u8 ep_out; struct urb *rx_urb[DLN2_MAX_URBS]; void *rx_buf[DLN2_MAX_URBS]; struct dln2_mod_rx_slots mod_rx_slots[DLN2_HANDLES]; struct list_head event_cb_list; spinlock_t event_cb_lock; bool disconnect; int active_transfers; wait_queue_head_t disconnect_wq; spinlock_t disconnect_lock; }; struct dln2_event_cb_entry { struct list_head list; u16 id; struct platform_device *pdev; dln2_event_cb_t callback; }; int dln2_register_event_cb(struct platform_device *pdev, u16 id, dln2_event_cb_t event_cb) { struct dln2_dev *dln2 = dev_get_drvdata(pdev->dev.parent); struct dln2_event_cb_entry *i, *entry; unsigned long flags; int ret = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->id = id; entry->callback = event_cb; entry->pdev = pdev; spin_lock_irqsave(&dln2->event_cb_lock, flags); list_for_each_entry(i, &dln2->event_cb_list, list) { if (i->id == id) { ret = -EBUSY; break; } } if (!ret) list_add_rcu(&entry->list, &dln2->event_cb_list); spin_unlock_irqrestore(&dln2->event_cb_lock, flags); if (ret) kfree(entry); return ret; } EXPORT_SYMBOL(dln2_register_event_cb); void dln2_unregister_event_cb(struct platform_device *pdev, u16 id) { struct dln2_dev *dln2 = dev_get_drvdata(pdev->dev.parent); struct dln2_event_cb_entry *i; unsigned long flags; bool found = false; spin_lock_irqsave(&dln2->event_cb_lock, flags); list_for_each_entry(i, &dln2->event_cb_list, list) { if (i->id == id) { list_del_rcu(&i->list); found = true; break; } } spin_unlock_irqrestore(&dln2->event_cb_lock, flags); if (found) { synchronize_rcu(); kfree(i); } } EXPORT_SYMBOL(dln2_unregister_event_cb); /* * Returns true if a valid transfer slot is found. In this case the URB must not * be resubmitted immediately in dln2_rx as we need the data when dln2_transfer * is woke up. It will be resubmitted there. */ static bool dln2_transfer_complete(struct dln2_dev *dln2, struct urb *urb, u16 handle, u16 rx_slot) { struct device *dev = &dln2->interface->dev; struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[handle]; struct dln2_rx_context *rxc; unsigned long flags; bool valid_slot = false; if (rx_slot >= DLN2_MAX_RX_SLOTS) goto out; rxc = &rxs->slots[rx_slot]; spin_lock_irqsave(&rxs->lock, flags); if (rxc->in_use && !rxc->urb) { rxc->urb = urb; complete(&rxc->done); valid_slot = true; } spin_unlock_irqrestore(&rxs->lock, flags); out: if (!valid_slot) dev_warn(dev, "bad/late response %d/%d\n", handle, rx_slot); return valid_slot; } static void dln2_run_event_callbacks(struct dln2_dev *dln2, u16 id, u16 echo, void *data, int len) { struct dln2_event_cb_entry *i; rcu_read_lock(); list_for_each_entry_rcu(i, &dln2->event_cb_list, list) { if (i->id == id) { i->callback(i->pdev, echo, data, len); break; } } rcu_read_unlock(); } static void dln2_rx(struct urb *urb) { struct dln2_dev *dln2 = urb->context; struct dln2_header *hdr = urb->transfer_buffer; struct device *dev = &dln2->interface->dev; u16 id, echo, handle, size; u8 *data; int len; int err; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EPIPE: /* this urb is terminated, clean up */ dev_dbg(dev, "urb shutting down with status %d\n", urb->status); return; default: dev_dbg(dev, "nonzero urb status received %d\n", urb->status); goto out; } if (urb->actual_length < sizeof(struct dln2_header)) { dev_err(dev, "short response: %d\n", urb->actual_length); goto out; } handle = le16_to_cpu(hdr->handle); id = le16_to_cpu(hdr->id); echo = le16_to_cpu(hdr->echo); size = le16_to_cpu(hdr->size); if (size != urb->actual_length) { dev_err(dev, "size mismatch: handle %x cmd %x echo %x size %d actual %d\n", handle, id, echo, size, urb->actual_length); goto out; } if (handle >= DLN2_HANDLES) { dev_warn(dev, "invalid handle %d\n", handle); goto out; } data = urb->transfer_buffer + sizeof(struct dln2_header); len = urb->actual_length - sizeof(struct dln2_header); if (handle == DLN2_HANDLE_EVENT) { unsigned long flags; spin_lock_irqsave(&dln2->event_cb_lock, flags); dln2_run_event_callbacks(dln2, id, echo, data, len); spin_unlock_irqrestore(&dln2->event_cb_lock, flags); } else { /* URB will be re-submitted in _dln2_transfer (free_rx_slot) */ if (dln2_transfer_complete(dln2, urb, handle, echo)) return; } out: err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) dev_err(dev, "failed to resubmit RX URB: %d\n", err); } static void *dln2_prep_buf(u16 handle, u16 cmd, u16 echo, const void *obuf, int *obuf_len, gfp_t gfp) { int len; void *buf; struct dln2_header *hdr; len = *obuf_len + sizeof(*hdr); buf = kmalloc(len, gfp); if (!buf) return NULL; hdr = (struct dln2_header *)buf; hdr->id = cpu_to_le16(cmd); hdr->size = cpu_to_le16(len); hdr->echo = cpu_to_le16(echo); hdr->handle = cpu_to_le16(handle); memcpy(buf + sizeof(*hdr), obuf, *obuf_len); *obuf_len = len; return buf; } static int dln2_send_wait(struct dln2_dev *dln2, u16 handle, u16 cmd, u16 echo, const void *obuf, int obuf_len) { int ret = 0; int len = obuf_len; void *buf; int actual; buf = dln2_prep_buf(handle, cmd, echo, obuf, &len, GFP_KERNEL); if (!buf) return -ENOMEM; ret = usb_bulk_msg(dln2->usb_dev, usb_sndbulkpipe(dln2->usb_dev, dln2->ep_out), buf, len, &actual, DLN2_USB_TIMEOUT); kfree(buf); return ret; } static bool find_free_slot(struct dln2_dev *dln2, u16 handle, int *slot) { struct dln2_mod_rx_slots *rxs; unsigned long flags; if (dln2->disconnect) { *slot = -ENODEV; return true; } rxs = &dln2->mod_rx_slots[handle]; spin_lock_irqsave(&rxs->lock, flags); *slot = find_first_zero_bit(rxs->bmap, DLN2_MAX_RX_SLOTS); if (*slot < DLN2_MAX_RX_SLOTS) { struct dln2_rx_context *rxc = &rxs->slots[*slot]; set_bit(*slot, rxs->bmap); rxc->in_use = true; } spin_unlock_irqrestore(&rxs->lock, flags); return *slot < DLN2_MAX_RX_SLOTS; } static int alloc_rx_slot(struct dln2_dev *dln2, u16 handle) { int ret; int slot; /* * No need to timeout here, the wait is bounded by the timeout in * _dln2_transfer. */ ret = wait_event_interruptible(dln2->mod_rx_slots[handle].wq, find_free_slot(dln2, handle, &slot)); if (ret < 0) return ret; return slot; } static void free_rx_slot(struct dln2_dev *dln2, u16 handle, int slot) { struct dln2_mod_rx_slots *rxs; struct urb *urb = NULL; unsigned long flags; struct dln2_rx_context *rxc; rxs = &dln2->mod_rx_slots[handle]; spin_lock_irqsave(&rxs->lock, flags); clear_bit(slot, rxs->bmap); rxc = &rxs->slots[slot]; rxc->in_use = false; urb = rxc->urb; rxc->urb = NULL; reinit_completion(&rxc->done); spin_unlock_irqrestore(&rxs->lock, flags); if (urb) { int err; struct device *dev = &dln2->interface->dev; err = usb_submit_urb(urb, GFP_KERNEL); if (err < 0) dev_err(dev, "failed to resubmit RX URB: %d\n", err); } wake_up_interruptible(&rxs->wq); } static int _dln2_transfer(struct dln2_dev *dln2, u16 handle, u16 cmd, const void *obuf, unsigned obuf_len, void *ibuf, unsigned *ibuf_len) { int ret = 0; int rx_slot; struct dln2_response *rsp; struct dln2_rx_context *rxc; struct device *dev = &dln2->interface->dev; const unsigned long timeout = msecs_to_jiffies(DLN2_USB_TIMEOUT); struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[handle]; int size; spin_lock(&dln2->disconnect_lock); if (!dln2->disconnect) dln2->active_transfers++; else ret = -ENODEV; spin_unlock(&dln2->disconnect_lock); if (ret) return ret; rx_slot = alloc_rx_slot(dln2, handle); if (rx_slot < 0) { ret = rx_slot; goto out_decr; } ret = dln2_send_wait(dln2, handle, cmd, rx_slot, obuf, obuf_len); if (ret < 0) { dev_err(dev, "USB write failed: %d\n", ret); goto out_free_rx_slot; } rxc = &rxs->slots[rx_slot]; ret = wait_for_completion_interruptible_timeout(&rxc->done, timeout); if (ret <= 0) { if (!ret) ret = -ETIMEDOUT; goto out_free_rx_slot; } else { ret = 0; } if (dln2->disconnect) { ret = -ENODEV; goto out_free_rx_slot; } /* if we got here we know that the response header has been checked */ rsp = rxc->urb->transfer_buffer; size = le16_to_cpu(rsp->hdr.size); if (size < sizeof(*rsp)) { ret = -EPROTO; goto out_free_rx_slot; } if (le16_to_cpu(rsp->result) > 0x80) { dev_dbg(dev, "%d received response with error %d\n", handle, le16_to_cpu(rsp->result)); ret = -EREMOTEIO; goto out_free_rx_slot; } if (!ibuf) goto out_free_rx_slot; if (*ibuf_len > size - sizeof(*rsp)) *ibuf_len = size - sizeof(*rsp); memcpy(ibuf, rsp + 1, *ibuf_len); out_free_rx_slot: free_rx_slot(dln2, handle, rx_slot); out_decr: spin_lock(&dln2->disconnect_lock); dln2->active_transfers--; spin_unlock(&dln2->disconnect_lock); if (dln2->disconnect) wake_up(&dln2->disconnect_wq); return ret; } int dln2_transfer(struct platform_device *pdev, u16 cmd, const void *obuf, unsigned obuf_len, void *ibuf, unsigned *ibuf_len) { struct dln2_platform_data *dln2_pdata; struct dln2_dev *dln2; u16 handle; dln2 = dev_get_drvdata(pdev->dev.parent); dln2_pdata = dev_get_platdata(&pdev->dev); handle = dln2_pdata->handle; return _dln2_transfer(dln2, handle, cmd, obuf, obuf_len, ibuf, ibuf_len); } EXPORT_SYMBOL(dln2_transfer); static int dln2_check_hw(struct dln2_dev *dln2) { int ret; __le32 hw_type; int len = sizeof(hw_type); ret = _dln2_transfer(dln2, DLN2_HANDLE_CTRL, CMD_GET_DEVICE_VER, NULL, 0, &hw_type, &len); if (ret < 0) return ret; if (len < sizeof(hw_type)) return -EREMOTEIO; if (le32_to_cpu(hw_type) != DLN2_HW_ID) { dev_err(&dln2->interface->dev, "Device ID 0x%x not supported\n", le32_to_cpu(hw_type)); return -ENODEV; } return 0; } static int dln2_print_serialno(struct dln2_dev *dln2) { int ret; __le32 serial_no; int len = sizeof(serial_no); struct device *dev = &dln2->interface->dev; ret = _dln2_transfer(dln2, DLN2_HANDLE_CTRL, CMD_GET_DEVICE_SN, NULL, 0, &serial_no, &len); if (ret < 0) return ret; if (len < sizeof(serial_no)) return -EREMOTEIO; dev_info(dev, "Diolan DLN2 serial %u\n", le32_to_cpu(serial_no)); return 0; } static int dln2_hw_init(struct dln2_dev *dln2) { int ret; ret = dln2_check_hw(dln2); if (ret < 0) return ret; return dln2_print_serialno(dln2); } static void dln2_free_rx_urbs(struct dln2_dev *dln2) { int i; for (i = 0; i < DLN2_MAX_URBS; i++) { usb_free_urb(dln2->rx_urb[i]); kfree(dln2->rx_buf[i]); } } static void dln2_stop_rx_urbs(struct dln2_dev *dln2) { int i; for (i = 0; i < DLN2_MAX_URBS; i++) usb_kill_urb(dln2->rx_urb[i]); } static void dln2_free(struct dln2_dev *dln2) { dln2_free_rx_urbs(dln2); usb_put_dev(dln2->usb_dev); kfree(dln2); } static int dln2_setup_rx_urbs(struct dln2_dev *dln2, struct usb_host_interface *hostif) { int i; const int rx_max_size = DLN2_RX_BUF_SIZE; for (i = 0; i < DLN2_MAX_URBS; i++) { dln2->rx_buf[i] = kmalloc(rx_max_size, GFP_KERNEL); if (!dln2->rx_buf[i]) return -ENOMEM; dln2->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL); if (!dln2->rx_urb[i]) return -ENOMEM; usb_fill_bulk_urb(dln2->rx_urb[i], dln2->usb_dev, usb_rcvbulkpipe(dln2->usb_dev, dln2->ep_in), dln2->rx_buf[i], rx_max_size, dln2_rx, dln2); } return 0; } static int dln2_start_rx_urbs(struct dln2_dev *dln2, gfp_t gfp) { struct device *dev = &dln2->interface->dev; int ret; int i; for (i = 0; i < DLN2_MAX_URBS; i++) { ret = usb_submit_urb(dln2->rx_urb[i], gfp); if (ret < 0) { dev_err(dev, "failed to submit RX URB: %d\n", ret); return ret; } } return 0; } enum { DLN2_ACPI_MATCH_GPIO = 0, DLN2_ACPI_MATCH_I2C = 1, DLN2_ACPI_MATCH_SPI = 2, DLN2_ACPI_MATCH_ADC = 3, }; static struct dln2_platform_data dln2_pdata_gpio = { .handle = DLN2_HANDLE_GPIO, }; static struct mfd_cell_acpi_match dln2_acpi_match_gpio = { .adr = DLN2_ACPI_MATCH_GPIO, }; /* Only one I2C port seems to be supported on current hardware */ static struct dln2_platform_data dln2_pdata_i2c = { .handle = DLN2_HANDLE_I2C, .port = 0, }; static struct mfd_cell_acpi_match dln2_acpi_match_i2c = { .adr = DLN2_ACPI_MATCH_I2C, }; /* Only one SPI port supported */ static struct dln2_platform_data dln2_pdata_spi = { .handle = DLN2_HANDLE_SPI, .port = 0, }; static struct mfd_cell_acpi_match dln2_acpi_match_spi = { .adr = DLN2_ACPI_MATCH_SPI, }; /* Only one ADC port supported */ static struct dln2_platform_data dln2_pdata_adc = { .handle = DLN2_HANDLE_ADC, .port = 0, }; static struct mfd_cell_acpi_match dln2_acpi_match_adc = { .adr = DLN2_ACPI_MATCH_ADC, }; static const struct mfd_cell dln2_devs[] = { { .name = "dln2-gpio", .acpi_match = &dln2_acpi_match_gpio, .platform_data = &dln2_pdata_gpio, .pdata_size = sizeof(struct dln2_platform_data), }, { .name = "dln2-i2c", .acpi_match = &dln2_acpi_match_i2c, .platform_data = &dln2_pdata_i2c, .pdata_size = sizeof(struct dln2_platform_data), }, { .name = "dln2-spi", .acpi_match = &dln2_acpi_match_spi, .platform_data = &dln2_pdata_spi, .pdata_size = sizeof(struct dln2_platform_data), }, { .name = "dln2-adc", .acpi_match = &dln2_acpi_match_adc, .platform_data = &dln2_pdata_adc, .pdata_size = sizeof(struct dln2_platform_data), }, }; static void dln2_stop(struct dln2_dev *dln2) { int i, j; /* don't allow starting new transfers */ spin_lock(&dln2->disconnect_lock); dln2->disconnect = true; spin_unlock(&dln2->disconnect_lock); /* cancel in progress transfers */ for (i = 0; i < DLN2_HANDLES; i++) { struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[i]; unsigned long flags; spin_lock_irqsave(&rxs->lock, flags); /* cancel all response waiters */ for (j = 0; j < DLN2_MAX_RX_SLOTS; j++) { struct dln2_rx_context *rxc = &rxs->slots[j]; if (rxc->in_use) complete(&rxc->done); } spin_unlock_irqrestore(&rxs->lock, flags); } /* wait for transfers to end */ wait_event(dln2->disconnect_wq, !dln2->active_transfers); dln2_stop_rx_urbs(dln2); } static void dln2_disconnect(struct usb_interface *interface) { struct dln2_dev *dln2 = usb_get_intfdata(interface); dln2_stop(dln2); mfd_remove_devices(&interface->dev); dln2_free(dln2); } static int dln2_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { struct usb_host_interface *hostif = interface->cur_altsetting; struct usb_endpoint_descriptor *epin; struct usb_endpoint_descriptor *epout; struct device *dev = &interface->dev; struct dln2_dev *dln2; int ret; int i, j; if (hostif->desc.bInterfaceNumber != 0) return -ENODEV; ret = usb_find_common_endpoints(hostif, &epin, &epout, NULL, NULL); if (ret) return ret; dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL); if (!dln2) return -ENOMEM; dln2->ep_out = epout->bEndpointAddress; dln2->ep_in = epin->bEndpointAddress; dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dln2->interface = interface; usb_set_intfdata(interface, dln2); init_waitqueue_head(&dln2->disconnect_wq); for (i = 0; i < DLN2_HANDLES; i++) { init_waitqueue_head(&dln2->mod_rx_slots[i].wq); spin_lock_init(&dln2->mod_rx_slots[i].lock); for (j = 0; j < DLN2_MAX_RX_SLOTS; j++) init_completion(&dln2->mod_rx_slots[i].slots[j].done); } spin_lock_init(&dln2->event_cb_lock); spin_lock_init(&dln2->disconnect_lock); INIT_LIST_HEAD(&dln2->event_cb_list); ret = dln2_setup_rx_urbs(dln2, hostif); if (ret) goto out_free; ret = dln2_start_rx_urbs(dln2, GFP_KERNEL); if (ret) goto out_stop_rx; ret = dln2_hw_init(dln2); if (ret < 0) { dev_err(dev, "failed to initialize hardware\n"); goto out_stop_rx; } ret = mfd_add_hotplug_devices(dev, dln2_devs, ARRAY_SIZE(dln2_devs)); if (ret != 0) { dev_err(dev, "failed to add mfd devices to core\n"); goto out_stop_rx; } return 0; out_stop_rx: dln2_stop_rx_urbs(dln2); out_free: dln2_free(dln2); return ret; } static int dln2_suspend(struct usb_interface *iface, pm_message_t message) { struct dln2_dev *dln2 = usb_get_intfdata(iface); dln2_stop(dln2); return 0; } static int dln2_resume(struct usb_interface *iface) { struct dln2_dev *dln2 = usb_get_intfdata(iface); dln2->disconnect = false; return dln2_start_rx_urbs(dln2, GFP_NOIO); } static const struct usb_device_id dln2_table[] = { { USB_DEVICE(0xa257, 0x2013) }, { } }; MODULE_DEVICE_TABLE(usb, dln2_table); static struct usb_driver dln2_driver = { .name = "dln2", .probe = dln2_probe, .disconnect = dln2_disconnect, .id_table = dln2_table, .suspend = dln2_suspend, .resume = dln2_resume, }; module_usb_driver(dln2_driver); MODULE_AUTHOR("Octavian Purdila <octavian.purdila@intel.com>"); MODULE_DESCRIPTION("Core driver for the Diolan DLN2 interface adapter"); MODULE_LICENSE("GPL v2");
1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for SmartJoy PLUS PS2->USB adapter * * Copyright (c) 2009 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * Based of hid-pl.c and hid-gaff.c * Copyright (c) 2007, 2009 Anssi Hannula <anssi.hannula@gmail.com> * Copyright (c) 2008 Lukasz Lubojanski <lukasz@lubojanski.info> */ /* */ /* #define DEBUG */ #include <linux/input.h> #include <linux/slab.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" #ifdef CONFIG_SMARTJOYPLUS_FF struct sjoyff_device { struct hid_report *report; }; static int hid_sjoyff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct sjoyff_device *sjoyff = data; u32 left, right; left = effect->u.rumble.strong_magnitude; right = effect->u.rumble.weak_magnitude; dev_dbg(&dev->dev, "called with 0x%08x 0x%08x\n", left, right); left = left * 0xff / 0xffff; right = (right != 0); /* on/off only */ sjoyff->report->field[0]->value[1] = right; sjoyff->report->field[0]->value[2] = left; dev_dbg(&dev->dev, "running with 0x%02x 0x%02x\n", left, right); hid_hw_request(hid, sjoyff->report, HID_REQ_SET_REPORT); return 0; } static int sjoyff_init(struct hid_device *hid) { struct sjoyff_device *sjoyff; struct hid_report *report; struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct list_head *report_ptr = report_list; struct input_dev *dev; int error; if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; } list_for_each_entry(hidinput, &hid->inputs, list) { report_ptr = report_ptr->next; if (report_ptr == report_list) { hid_err(hid, "required output report is missing\n"); return -ENODEV; } report = list_entry(report_ptr, struct hid_report, list); if (report->maxfield < 1) { hid_err(hid, "no fields in the report\n"); return -ENODEV; } if (report->field[0]->report_count < 3) { hid_err(hid, "not enough values in the field\n"); return -ENODEV; } sjoyff = kzalloc(sizeof(struct sjoyff_device), GFP_KERNEL); if (!sjoyff) return -ENOMEM; dev = hidinput->input; set_bit(FF_RUMBLE, dev->ffbit); error = input_ff_create_memless(dev, sjoyff, hid_sjoyff_play); if (error) { kfree(sjoyff); return error; } sjoyff->report = report; sjoyff->report->field[0]->value[0] = 0x01; sjoyff->report->field[0]->value[1] = 0x00; sjoyff->report->field[0]->value[2] = 0x00; hid_hw_request(hid, sjoyff->report, HID_REQ_SET_REPORT); } hid_info(hid, "Force feedback for SmartJoy PLUS PS2/USB adapter\n"); return 0; } #else static inline int sjoyff_init(struct hid_device *hid) { return 0; } #endif static int sjoy_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; hdev->quirks |= id->driver_data; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } sjoyff_init(hdev); return 0; err: return ret; } static const struct hid_device_id sjoy_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_3_PRO), .driver_data = HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_DUAL_BOX_PRO), .driver_data = HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD, USB_DEVICE_ID_SUPER_JOY_BOX_5_PRO), .driver_data = HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD), .driver_data = HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII), .driver_data = HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { } }; MODULE_DEVICE_TABLE(hid, sjoy_devices); static struct hid_driver sjoy_driver = { .name = "smartjoyplus", .id_table = sjoy_devices, .probe = sjoy_probe, }; module_hid_driver(sjoy_driver); MODULE_DESCRIPTION("Force feedback support for SmartJoy PLUS PS2->USB adapter"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jussi Kivilinna");
1 30 30 103 103 75 68 68 100 100 34 89 89 89 1 1 1 68 68 8 1 8 8 6 8 8 11 68 68 34 34 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 /* * net/tipc/server.c: TIPC server infrastructure * * Copyright (c) 2012-2013, Wind River Systems * Copyright (c) 2017-2018, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "subscr.h" #include "topsrv.h" #include "core.h" #include "socket.h" #include "addr.h" #include "msg.h" #include "bearer.h" #include <net/sock.h> #include <linux/module.h> #include <trace/events/sock.h> /* Number of messages to send before rescheduling */ #define MAX_SEND_MSG_COUNT 25 #define MAX_RECV_MSG_COUNT 25 #define CF_CONNECTED 1 #define TIPC_SERVER_NAME_LEN 32 /** * struct tipc_topsrv - TIPC server structure * @conn_idr: identifier set of connection * @idr_lock: protect the connection identifier set * @idr_in_use: amount of allocated identifier entry * @net: network namspace instance * @awork: accept work item * @rcv_wq: receive workqueue * @send_wq: send workqueue * @listener: topsrv listener socket * @name: server name */ struct tipc_topsrv { struct idr conn_idr; spinlock_t idr_lock; /* for idr list */ int idr_in_use; struct net *net; struct work_struct awork; struct workqueue_struct *rcv_wq; struct workqueue_struct *send_wq; struct socket *listener; char name[TIPC_SERVER_NAME_LEN]; }; /** * struct tipc_conn - TIPC connection structure * @kref: reference counter to connection object * @conid: connection identifier * @sock: socket handler associated with connection * @flags: indicates connection state * @server: pointer to connected server * @sub_list: lsit to all pertaing subscriptions * @sub_lock: lock protecting the subscription list * @rwork: receive work item * @outqueue: pointer to first outbound message in queue * @outqueue_lock: control access to the outqueue * @swork: send work item */ struct tipc_conn { struct kref kref; int conid; struct socket *sock; unsigned long flags; struct tipc_topsrv *server; struct list_head sub_list; spinlock_t sub_lock; /* for subscription list */ struct work_struct rwork; struct list_head outqueue; spinlock_t outqueue_lock; /* for outqueue */ struct work_struct swork; }; /* An entry waiting to be sent */ struct outqueue_entry { bool inactive; struct tipc_event evt; struct list_head list; }; static void tipc_conn_recv_work(struct work_struct *work); static void tipc_conn_send_work(struct work_struct *work); static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt); static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s); static bool connected(struct tipc_conn *con) { return con && test_bit(CF_CONNECTED, &con->flags); } static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); struct tipc_topsrv *s = con->server; struct outqueue_entry *e, *safe; spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; spin_unlock_bh(&s->idr_lock); if (con->sock) sock_release(con->sock); spin_lock_bh(&con->outqueue_lock); list_for_each_entry_safe(e, safe, &con->outqueue, list) { list_del(&e->list); kfree(e); } spin_unlock_bh(&con->outqueue_lock); kfree(con); } static void conn_put(struct tipc_conn *con) { kref_put(&con->kref, tipc_conn_kref_release); } static void conn_get(struct tipc_conn *con) { kref_get(&con->kref); } static void tipc_conn_close(struct tipc_conn *con) { struct sock *sk = con->sock->sk; bool disconnect = false; write_lock_bh(&sk->sk_callback_lock); disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); if (disconnect) { sk->sk_user_data = NULL; tipc_conn_delete_sub(con, NULL); } write_unlock_bh(&sk->sk_callback_lock); /* Handle concurrent calls from sending and receiving threads */ if (!disconnect) return; /* Don't flush pending works, -just let them expire */ kernel_sock_shutdown(con->sock, SHUT_RDWR); conn_put(con); } static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) { struct tipc_conn *con; int ret; con = kzalloc(sizeof(*con), GFP_ATOMIC); if (!con) return ERR_PTR(-ENOMEM); kref_init(&con->kref); INIT_LIST_HEAD(&con->outqueue); INIT_LIST_HEAD(&con->sub_list); spin_lock_init(&con->outqueue_lock); spin_lock_init(&con->sub_lock); INIT_WORK(&con->swork, tipc_conn_send_work); INIT_WORK(&con->rwork, tipc_conn_recv_work); spin_lock_bh(&s->idr_lock); ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); if (ret < 0) { kfree(con); spin_unlock_bh(&s->idr_lock); return ERR_PTR(-ENOMEM); } con->conid = ret; s->idr_in_use++; set_bit(CF_CONNECTED, &con->flags); con->server = s; con->sock = sock; conn_get(con); spin_unlock_bh(&s->idr_lock); return con; } static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid) { struct tipc_conn *con; spin_lock_bh(&s->idr_lock); con = idr_find(&s->conn_idr, conid); if (!connected(con) || !kref_get_unless_zero(&con->kref)) con = NULL; spin_unlock_bh(&s->idr_lock); return con; } /* tipc_conn_delete_sub - delete a specific or all subscriptions * for a given subscriber */ static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) { struct tipc_net *tn = tipc_net(con->server->net); struct list_head *sub_list = &con->sub_list; struct tipc_subscription *sub, *tmp; spin_lock_bh(&con->sub_lock); list_for_each_entry_safe(sub, tmp, sub_list, sub_list) { if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { tipc_sub_unsubscribe(sub); atomic_dec(&tn->subscription_count); if (s) break; } } spin_unlock_bh(&con->sub_lock); } static void tipc_conn_send_to_sock(struct tipc_conn *con) { struct list_head *queue = &con->outqueue; struct tipc_topsrv *srv = con->server; struct outqueue_entry *e; struct tipc_event *evt; struct msghdr msg; struct kvec iov; int count = 0; int ret; spin_lock_bh(&con->outqueue_lock); while (!list_empty(queue)) { e = list_first_entry(queue, struct outqueue_entry, list); evt = &e->evt; spin_unlock_bh(&con->outqueue_lock); if (e->inactive) tipc_conn_delete_sub(con, &evt->s); memset(&msg, 0, sizeof(msg)); msg.msg_flags = MSG_DONTWAIT; iov.iov_base = evt; iov.iov_len = sizeof(*evt); msg.msg_name = NULL; if (con->sock) { ret = kernel_sendmsg(con->sock, &msg, &iov, 1, sizeof(*evt)); if (ret == -EWOULDBLOCK || ret == 0) { cond_resched(); return; } else if (ret < 0) { return tipc_conn_close(con); } } else { tipc_topsrv_kern_evt(srv->net, evt); } /* Don't starve users filling buffers */ if (++count >= MAX_SEND_MSG_COUNT) { cond_resched(); count = 0; } spin_lock_bh(&con->outqueue_lock); list_del(&e->list); kfree(e); } spin_unlock_bh(&con->outqueue_lock); } static void tipc_conn_send_work(struct work_struct *work) { struct tipc_conn *con = container_of(work, struct tipc_conn, swork); if (connected(con)) tipc_conn_send_to_sock(con); conn_put(con); } /* tipc_topsrv_queue_evt() - interrupt level call from a subscription instance * The queued work is launched into tipc_conn_send_work()->tipc_conn_send_to_sock() */ void tipc_topsrv_queue_evt(struct net *net, int conid, u32 event, struct tipc_event *evt) { struct tipc_topsrv *srv = tipc_topsrv(net); struct outqueue_entry *e; struct tipc_conn *con; con = tipc_conn_lookup(srv, conid); if (!con) return; if (!connected(con)) goto err; e = kmalloc(sizeof(*e), GFP_ATOMIC); if (!e) goto err; e->inactive = (event == TIPC_SUBSCR_TIMEOUT); memcpy(&e->evt, evt, sizeof(*evt)); spin_lock_bh(&con->outqueue_lock); list_add_tail(&e->list, &con->outqueue); spin_unlock_bh(&con->outqueue_lock); if (queue_work(srv->send_wq, &con->swork)) return; err: conn_put(con); } /* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN * Indicates that there now is more space in the send buffer * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock() */ static void tipc_conn_write_space(struct sock *sk) { struct tipc_conn *con; read_lock_bh(&sk->sk_callback_lock); con = sk->sk_user_data; if (connected(con)) { conn_get(con); if (!queue_work(con->server->send_wq, &con->swork)) conn_put(con); } read_unlock_bh(&sk->sk_callback_lock); } static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, struct tipc_conn *con, struct tipc_subscr *s) { struct tipc_net *tn = tipc_net(srv->net); struct tipc_subscription *sub; u32 s_filter = tipc_sub_read(s, filter); if (s_filter & TIPC_SUB_CANCEL) { tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL); tipc_conn_delete_sub(con, s); return 0; } if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) { pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR); return -1; } sub = tipc_sub_subscribe(srv->net, s, con->conid); if (!sub) return -1; atomic_inc(&tn->subscription_count); spin_lock_bh(&con->sub_lock); list_add(&sub->sub_list, &con->sub_list); spin_unlock_bh(&con->sub_lock); return 0; } static int tipc_conn_rcv_from_sock(struct tipc_conn *con) { struct tipc_topsrv *srv = con->server; struct sock *sk = con->sock->sk; struct msghdr msg = {}; struct tipc_subscr s; struct kvec iov; int ret; iov.iov_base = &s; iov.iov_len = sizeof(s); msg.msg_name = NULL; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, iov.iov_len); ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret == -EWOULDBLOCK) return -EWOULDBLOCK; if (ret == sizeof(s)) { read_lock_bh(&sk->sk_callback_lock); /* RACE: the connection can be closed in the meantime */ if (likely(connected(con))) ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); if (!ret) return 0; } tipc_conn_close(con); return ret; } static void tipc_conn_recv_work(struct work_struct *work) { struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); int count = 0; while (connected(con)) { if (tipc_conn_rcv_from_sock(con)) break; /* Don't flood Rx machine */ if (++count >= MAX_RECV_MSG_COUNT) { cond_resched(); count = 0; } } conn_put(con); } /* tipc_conn_data_ready - interrupt callback indicating the socket has data * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock() */ static void tipc_conn_data_ready(struct sock *sk) { struct tipc_conn *con; trace_sk_data_ready(sk); read_lock_bh(&sk->sk_callback_lock); con = sk->sk_user_data; if (connected(con)) { conn_get(con); if (!queue_work(con->server->rcv_wq, &con->rwork)) conn_put(con); } read_unlock_bh(&sk->sk_callback_lock); } static void tipc_topsrv_accept(struct work_struct *work) { struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); struct socket *newsock, *lsock; struct tipc_conn *con; struct sock *newsk; int ret; spin_lock_bh(&srv->idr_lock); if (!srv->listener) { spin_unlock_bh(&srv->idr_lock); return; } lsock = srv->listener; spin_unlock_bh(&srv->idr_lock); while (1) { ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) return; con = tipc_conn_alloc(srv, newsock); if (IS_ERR(con)) { ret = PTR_ERR(con); sock_release(newsock); return; } /* Register callbacks */ newsk = newsock->sk; write_lock_bh(&newsk->sk_callback_lock); newsk->sk_data_ready = tipc_conn_data_ready; newsk->sk_write_space = tipc_conn_write_space; newsk->sk_user_data = con; write_unlock_bh(&newsk->sk_callback_lock); /* Wake up receive process in case of 'SYN+' message */ newsk->sk_data_ready(newsk); conn_put(con); } } /* tipc_topsrv_listener_data_ready - interrupt callback with connection request * The queued job is launched into tipc_topsrv_accept() */ static void tipc_topsrv_listener_data_ready(struct sock *sk) { struct tipc_topsrv *srv; trace_sk_data_ready(sk); read_lock_bh(&sk->sk_callback_lock); srv = sk->sk_user_data; if (srv) queue_work(srv->rcv_wq, &srv->awork); read_unlock_bh(&sk->sk_callback_lock); } static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) { struct socket *lsock = NULL; struct sockaddr_tipc saddr; struct sock *sk; int rc; rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock); if (rc < 0) return rc; srv->listener = lsock; sk = lsock->sk; write_lock_bh(&sk->sk_callback_lock); sk->sk_data_ready = tipc_topsrv_listener_data_ready; sk->sk_user_data = srv; write_unlock_bh(&sk->sk_callback_lock); lock_sock(sk); rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE); release_sock(sk); if (rc < 0) goto err; saddr.family = AF_TIPC; saddr.addrtype = TIPC_SERVICE_RANGE; saddr.addr.nameseq.type = TIPC_TOP_SRV; saddr.addr.nameseq.lower = TIPC_TOP_SRV; saddr.addr.nameseq.upper = TIPC_TOP_SRV; saddr.scope = TIPC_NODE_SCOPE; rc = tipc_sk_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); if (rc < 0) goto err; rc = kernel_listen(lsock, 0); if (rc < 0) goto err; /* As server's listening socket owner and creator is the same module, * we have to decrease TIPC module reference count to guarantee that * it remains zero after the server socket is created, otherwise, * executing "rmmod" command is unable to make TIPC module deleted * after TIPC module is inserted successfully. * * However, the reference count is ever increased twice in * sock_create_kern(): one is to increase the reference count of owner * of TIPC socket's proto_ops struct; another is to increment the * reference count of owner of TIPC proto struct. Therefore, we must * decrement the module reference count twice to ensure that it keeps * zero after server's listening socket is created. Of course, we * must bump the module reference count twice as well before the socket * is closed. */ module_put(lsock->ops->owner); module_put(sk->sk_prot_creator->owner); return 0; err: sock_release(lsock); return -EINVAL; } bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, u32 upper, u32 filter, int *conid) { struct tipc_subscr sub; struct tipc_conn *con; int rc; sub.seq.type = type; sub.seq.lower = lower; sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; sub.filter = filter; *(u64 *)&sub.usr_handle = (u64)port; con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) return false; *conid = con->conid; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); if (rc) conn_put(con); conn_put(con); return !rc; } void tipc_topsrv_kern_unsubscr(struct net *net, int conid) { struct tipc_conn *con; con = tipc_conn_lookup(tipc_topsrv(net), conid); if (!con) return; test_and_clear_bit(CF_CONNECTED, &con->flags); tipc_conn_delete_sub(con, NULL); conn_put(con); conn_put(con); } static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) { u32 port = *(u32 *)&evt->s.usr_handle; u32 self = tipc_own_addr(net); struct sk_buff_head evtq; struct sk_buff *skb; skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), self, self, port, port, 0); if (!skb) return; msg_set_dest_droppable(buf_msg(skb), true); memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); skb_queue_head_init(&evtq); __skb_queue_tail(&evtq, skb); tipc_loopback_trace(net, &evtq); tipc_sk_rcv(net, &evtq); } static int tipc_topsrv_work_start(struct tipc_topsrv *s) { s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); if (!s->rcv_wq) { pr_err("can't start tipc receive workqueue\n"); return -ENOMEM; } s->send_wq = alloc_ordered_workqueue("tipc_send", 0); if (!s->send_wq) { pr_err("can't start tipc send workqueue\n"); destroy_workqueue(s->rcv_wq); return -ENOMEM; } return 0; } static void tipc_topsrv_work_stop(struct tipc_topsrv *s) { destroy_workqueue(s->rcv_wq); destroy_workqueue(s->send_wq); } static int tipc_topsrv_start(struct net *net) { struct tipc_net *tn = tipc_net(net); const char name[] = "topology_server"; struct tipc_topsrv *srv; int ret; srv = kzalloc(sizeof(*srv), GFP_ATOMIC); if (!srv) return -ENOMEM; srv->net = net; INIT_WORK(&srv->awork, tipc_topsrv_accept); strscpy(srv->name, name, sizeof(srv->name)); tn->topsrv = srv; atomic_set(&tn->subscription_count, 0); spin_lock_init(&srv->idr_lock); idr_init(&srv->conn_idr); srv->idr_in_use = 0; ret = tipc_topsrv_work_start(srv); if (ret < 0) goto err_start; ret = tipc_topsrv_create_listener(srv); if (ret < 0) goto err_create; return 0; err_create: tipc_topsrv_work_stop(srv); err_start: kfree(srv); return ret; } static void tipc_topsrv_stop(struct net *net) { struct tipc_topsrv *srv = tipc_topsrv(net); struct socket *lsock = srv->listener; struct tipc_conn *con; int id; spin_lock_bh(&srv->idr_lock); for (id = 0; srv->idr_in_use; id++) { con = idr_find(&srv->conn_idr, id); if (con) { conn_get(con); spin_unlock_bh(&srv->idr_lock); tipc_conn_close(con); conn_put(con); spin_lock_bh(&srv->idr_lock); } } __module_get(lsock->ops->owner); __module_get(lsock->sk->sk_prot_creator->owner); srv->listener = NULL; spin_unlock_bh(&srv->idr_lock); tipc_topsrv_work_stop(srv); sock_release(lsock); idr_destroy(&srv->conn_idr); kfree(srv); } int __net_init tipc_topsrv_init_net(struct net *net) { return tipc_topsrv_start(net); } void __net_exit tipc_topsrv_exit_net(struct net *net) { tipc_topsrv_stop(net); }
156 7 155 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 // SPDX-License-Identifier: GPL-2.0+ #include <linux/kernel.h> #include <linux/minmax.h> #include <drm/drm_blend.h> #include <drm/drm_rect.h> #include <drm/drm_fixed.h> #include "vkms_formats.h" /** * packed_pixels_offset() - Get the offset of the block containing the pixel at coordinates x/y * * @frame_info: Buffer metadata * @x: The x coordinate of the wanted pixel in the buffer * @y: The y coordinate of the wanted pixel in the buffer * @plane_index: The index of the plane to use * @offset: The returned offset inside the buffer of the block * @rem_x: The returned X coordinate of the requested pixel in the block * @rem_y: The returned Y coordinate of the requested pixel in the block * * As some pixel formats store multiple pixels in a block (DRM_FORMAT_R* for example), some * pixels are not individually addressable. This function return 3 values: the offset of the * whole block, and the coordinate of the requested pixel inside this block. * For example, if the format is DRM_FORMAT_R1 and the requested coordinate is 13,5, the offset * will point to the byte 5*pitches + 13/8 (second byte of the 5th line), and the rem_x/rem_y * coordinates will be (13 % 8, 5 % 1) = (5, 0) * * With this function, the caller just have to extract the correct pixel from the block. */ static void packed_pixels_offset(const struct vkms_frame_info *frame_info, int x, int y, int plane_index, int *offset, int *rem_x, int *rem_y) { struct drm_framebuffer *fb = frame_info->fb; const struct drm_format_info *format = frame_info->fb->format; /* Directly using x and y to multiply pitches and format->ccp is not sufficient because * in some formats a block can represent multiple pixels. * * Dividing x and y by the block size allows to extract the correct offset of the block * containing the pixel. */ int block_x = x / drm_format_info_block_width(format, plane_index); int block_y = y / drm_format_info_block_height(format, plane_index); int block_pitch = fb->pitches[plane_index] * drm_format_info_block_height(format, plane_index); *rem_x = x % drm_format_info_block_width(format, plane_index); *rem_y = y % drm_format_info_block_height(format, plane_index); *offset = fb->offsets[plane_index] + block_y * block_pitch + block_x * format->char_per_block[plane_index]; } /** * packed_pixels_addr() - Get the pointer to the block containing the pixel at the given * coordinates * * @frame_info: Buffer metadata * @x: The x (width) coordinate inside the plane * @y: The y (height) coordinate inside the plane * @plane_index: The index of the plane * @addr: The returned pointer * @rem_x: The returned X coordinate of the requested pixel in the block * @rem_y: The returned Y coordinate of the requested pixel in the block * * Takes the information stored in the frame_info, a pair of coordinates, and returns the address * of the block containing this pixel and the pixel position inside this block. * * See @packed_pixels_offset for details about rem_x/rem_y behavior. */ static void packed_pixels_addr(const struct vkms_frame_info *frame_info, int x, int y, int plane_index, u8 **addr, int *rem_x, int *rem_y) { int offset; packed_pixels_offset(frame_info, x, y, plane_index, &offset, rem_x, rem_y); *addr = (u8 *)frame_info->map[0].vaddr + offset; } /** * get_block_step_bytes() - Common helper to compute the correct step value between each pixel block * to read in a certain direction. * * @fb: Framebuffer to iter on * @direction: Direction of the reading * @plane_index: Plane to get the step from * * As the returned count is the number of bytes between two consecutive blocks in a direction, * the caller may have to read multiple pixels before using the next one (for example, to read from * left to right in a DRM_FORMAT_R1 plane, each block contains 8 pixels, so the step must be used * only every 8 pixels). */ static int get_block_step_bytes(struct drm_framebuffer *fb, enum pixel_read_direction direction, int plane_index) { switch (direction) { case READ_LEFT_TO_RIGHT: return fb->format->char_per_block[plane_index]; case READ_RIGHT_TO_LEFT: return -fb->format->char_per_block[plane_index]; case READ_TOP_TO_BOTTOM: return (int)fb->pitches[plane_index] * drm_format_info_block_width(fb->format, plane_index); case READ_BOTTOM_TO_TOP: return -(int)fb->pitches[plane_index] * drm_format_info_block_width(fb->format, plane_index); } return 0; } /** * packed_pixels_addr_1x1() - Get the pointer to the block containing the pixel at the given * coordinates * * @frame_info: Buffer metadata * @x: The x (width) coordinate inside the plane * @y: The y (height) coordinate inside the plane * @plane_index: The index of the plane * @addr: The returned pointer * * This function can only be used with format where block_h == block_w == 1. */ static void packed_pixels_addr_1x1(const struct vkms_frame_info *frame_info, int x, int y, int plane_index, u8 **addr) { int offset, rem_x, rem_y; WARN_ONCE(drm_format_info_block_width(frame_info->fb->format, plane_index) != 1, "%s() only support formats with block_w == 1", __func__); WARN_ONCE(drm_format_info_block_height(frame_info->fb->format, plane_index) != 1, "%s() only support formats with block_h == 1", __func__); packed_pixels_offset(frame_info, x, y, plane_index, &offset, &rem_x, &rem_y); *addr = (u8 *)frame_info->map[0].vaddr + offset; } /* * The following functions take pixel data (a, r, g, b, pixel, ...) and convert them to * &struct pixel_argb_u16 * * They are used in the `read_line`s functions to avoid duplicate work for some pixel formats. */ static struct pixel_argb_u16 argb_u16_from_u8888(u8 a, u8 r, u8 g, u8 b) { struct pixel_argb_u16 out_pixel; /* * The 257 is the "conversion ratio". This number is obtained by the * (2^16 - 1) / (2^8 - 1) division. Which, in this case, tries to get * the best color value in a pixel format with more possibilities. * A similar idea applies to others RGB color conversions. */ out_pixel.a = (u16)a * 257; out_pixel.r = (u16)r * 257; out_pixel.g = (u16)g * 257; out_pixel.b = (u16)b * 257; return out_pixel; } static struct pixel_argb_u16 argb_u16_from_u16161616(u16 a, u16 r, u16 g, u16 b) { struct pixel_argb_u16 out_pixel; out_pixel.a = a; out_pixel.r = r; out_pixel.g = g; out_pixel.b = b; return out_pixel; } static struct pixel_argb_u16 argb_u16_from_le16161616(__le16 a, __le16 r, __le16 g, __le16 b) { return argb_u16_from_u16161616(le16_to_cpu(a), le16_to_cpu(r), le16_to_cpu(g), le16_to_cpu(b)); } static struct pixel_argb_u16 argb_u16_from_RGB565(const __le16 *pixel) { struct pixel_argb_u16 out_pixel; s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31)); s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63)); u16 rgb_565 = le16_to_cpu(*pixel); s64 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f); s64 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f); s64 fp_b = drm_int2fixp(rgb_565 & 0x1f); out_pixel.a = (u16)0xffff; out_pixel.r = drm_fixp2int_round(drm_fixp_mul(fp_r, fp_rb_ratio)); out_pixel.g = drm_fixp2int_round(drm_fixp_mul(fp_g, fp_g_ratio)); out_pixel.b = drm_fixp2int_round(drm_fixp_mul(fp_b, fp_rb_ratio)); return out_pixel; } /* * The following functions are read_line function for each pixel format supported by VKMS. * * They read a line starting at the point @x_start,@y_start following the @direction. The result * is stored in @out_pixel and in the format ARGB16161616. * * These functions are very repetitive, but the innermost pixel loops must be kept inside these * functions for performance reasons. Some benchmarking was done in [1] where having the innermost * loop factored out of these functions showed a slowdown by a factor of three. * * [1]: https://lore.kernel.org/dri-devel/d258c8dc-78e9-4509-9037-a98f7f33b3a3@riseup.net/ */ static void ARGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, enum pixel_read_direction direction, int count, struct pixel_argb_u16 out_pixel[]) { struct pixel_argb_u16 *end = out_pixel + count; u8 *src_pixels; packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); while (out_pixel < end) { u8 *px = (u8 *)src_pixels; *out_pixel = argb_u16_from_u8888(px[3], px[2], px[1], px[0]); out_pixel += 1; src_pixels += step; } } static void XRGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, enum pixel_read_direction direction, int count, struct pixel_argb_u16 out_pixel[]) { struct pixel_argb_u16 *end = out_pixel + count; u8 *src_pixels; packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); while (out_pixel < end) { u8 *px = (u8 *)src_pixels; *out_pixel = argb_u16_from_u8888(255, px[2], px[1], px[0]); out_pixel += 1; src_pixels += step; } } static void ABGR8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, enum pixel_read_direction direction, int count, struct pixel_argb_u16 out_pixel[]) { struct pixel_argb_u16 *end = out_pixel + count; u8 *src_pixels; packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); while (out_pixel < end) { u8 *px = (u8 *)src_pixels; /* Switch blue and red pixels. */ *out_pixel = argb_u16_from_u8888(px[3], px[0], px[1], px[2]); out_pixel += 1; src_pixels += step; } } static void ARGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, enum pixel_read_direction direction, int count, struct pixel_argb_u16 out_pixel[]) { struct pixel_argb_u16 *end = out_pixel + count; u8 *src_pixels; packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); while (out_pixel < end) { u16 *px = (u16 *)src_pixels; *out_pixel = argb_u16_from_u16161616(px[3], px[2], px[1], px[0]); out_pixel += 1; src_pixels += step; } } static void XRGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, enum pixel_read_direction direction, int count, struct pixel_argb_u16 out_pixel[]) { struct pixel_argb_u16 *end = out_pixel + count; u8 *src_pixels; packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); while (out_pixel < end) { __le16 *px = (__le16 *)src_pixels; *out_pixel = argb_u16_from_le16161616(cpu_to_le16(0xFFFF), px[2], px[1], px[0]); out_pixel += 1; src_pixels += step; } } static void RGB565_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, enum pixel_read_direction direction, int count, struct pixel_argb_u16 out_pixel[]) { struct pixel_argb_u16 *end = out_pixel + count; u8 *src_pixels; packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); while (out_pixel < end) { __le16 *px = (__le16 *)src_pixels; *out_pixel = argb_u16_from_RGB565(px); out_pixel += 1; src_pixels += step; } } /* * The following functions take one &struct pixel_argb_u16 and convert it to a specific format. * The result is stored in @out_pixel. * * They are used in vkms_writeback_row() to convert and store a pixel from the src_buffer to * the writeback buffer. */ static void argb_u16_to_ARGB8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { /* * This sequence below is important because the format's byte order is * in little-endian. In the case of the ARGB8888 the memory is * organized this way: * * | Addr | = blue channel * | Addr + 1 | = green channel * | Addr + 2 | = Red channel * | Addr + 3 | = Alpha channel */ out_pixel[3] = DIV_ROUND_CLOSEST(in_pixel->a, 257); out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257); out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257); out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257); } static void argb_u16_to_XRGB8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { out_pixel[3] = 0xff; out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257); out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257); out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257); } static void argb_u16_to_ABGR8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { out_pixel[3] = DIV_ROUND_CLOSEST(in_pixel->a, 257); out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->b, 257); out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257); out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->r, 257); } static void argb_u16_to_ARGB16161616(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { __le16 *pixel = (__le16 *)out_pixel; pixel[3] = cpu_to_le16(in_pixel->a); pixel[2] = cpu_to_le16(in_pixel->r); pixel[1] = cpu_to_le16(in_pixel->g); pixel[0] = cpu_to_le16(in_pixel->b); } static void argb_u16_to_XRGB16161616(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { __le16 *pixel = (__le16 *)out_pixel; pixel[3] = cpu_to_le16(0xffff); pixel[2] = cpu_to_le16(in_pixel->r); pixel[1] = cpu_to_le16(in_pixel->g); pixel[0] = cpu_to_le16(in_pixel->b); } static void argb_u16_to_RGB565(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { __le16 *pixel = (__le16 *)out_pixel; s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31)); s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63)); s64 fp_r = drm_int2fixp(in_pixel->r); s64 fp_g = drm_int2fixp(in_pixel->g); s64 fp_b = drm_int2fixp(in_pixel->b); u16 r = drm_fixp2int(drm_fixp_div(fp_r, fp_rb_ratio)); u16 g = drm_fixp2int(drm_fixp_div(fp_g, fp_g_ratio)); u16 b = drm_fixp2int(drm_fixp_div(fp_b, fp_rb_ratio)); *pixel = cpu_to_le16(r << 11 | g << 5 | b); } /** * vkms_writeback_row() - Generic loop for all supported writeback format. It is executed just * after the blending to write a line in the writeback buffer. * * @wb: Job where to insert the final image * @src_buffer: Line to write * @y: Row to write in the writeback buffer */ void vkms_writeback_row(struct vkms_writeback_job *wb, const struct line_buffer *src_buffer, int y) { struct vkms_frame_info *frame_info = &wb->wb_frame_info; int x_dst = frame_info->dst.x1; u8 *dst_pixels; int rem_x, rem_y; packed_pixels_addr(frame_info, x_dst, y, 0, &dst_pixels, &rem_x, &rem_y); struct pixel_argb_u16 *in_pixels = src_buffer->pixels; int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), src_buffer->n_pixels); for (size_t x = 0; x < x_limit; x++, dst_pixels += frame_info->fb->format->cpp[0]) wb->pixel_write(dst_pixels, &in_pixels[x]); } /** * get_pixel_read_line_function() - Retrieve the correct read_line function for a specific * format. The returned pointer is NULL for unsupported pixel formats. The caller must ensure that * the pointer is valid before using it in a vkms_plane_state. * * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h]) */ pixel_read_line_t get_pixel_read_line_function(u32 format) { switch (format) { case DRM_FORMAT_ARGB8888: return &ARGB8888_read_line; case DRM_FORMAT_XRGB8888: return &XRGB8888_read_line; case DRM_FORMAT_ABGR8888: return &ABGR8888_read_line; case DRM_FORMAT_ARGB16161616: return &ARGB16161616_read_line; case DRM_FORMAT_XRGB16161616: return &XRGB16161616_read_line; case DRM_FORMAT_RGB565: return &RGB565_read_line; default: /* * This is a bug in vkms_plane_atomic_check(). All the supported * format must: * - Be listed in vkms_formats in vkms_plane.c * - Have a pixel_read callback defined here */ pr_err("Pixel format %p4cc is not supported by VKMS planes. This is a kernel bug, atomic check must forbid this configuration.\n", &format); BUG(); } } /** * get_pixel_write_function() - Retrieve the correct write_pixel function for a specific format. * The returned pointer is NULL for unsupported pixel formats. The caller must ensure that the * pointer is valid before using it in a vkms_writeback_job. * * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h]) */ pixel_write_t get_pixel_write_function(u32 format) { switch (format) { case DRM_FORMAT_ARGB8888: return &argb_u16_to_ARGB8888; case DRM_FORMAT_XRGB8888: return &argb_u16_to_XRGB8888; case DRM_FORMAT_ABGR8888: return &argb_u16_to_ABGR8888; case DRM_FORMAT_ARGB16161616: return &argb_u16_to_ARGB16161616; case DRM_FORMAT_XRGB16161616: return &argb_u16_to_XRGB16161616; case DRM_FORMAT_RGB565: return &argb_u16_to_RGB565; default: /* * This is a bug in vkms_writeback_atomic_check. All the supported * format must: * - Be listed in vkms_wb_formats in vkms_writeback.c * - Have a pixel_write callback defined here */ pr_err("Pixel format %p4cc is not supported by VKMS writeback. This is a kernel bug, atomic check must forbid this configuration.\n", &format); BUG(); } }
1 3 2 1 3 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for DragonRise Inc. game controllers * * From what I have gathered, these devices are mass produced in China and are * distributed under several vendors. They often share the same design as * the original PlayStation DualShock controller. * * 0079:0006 "DragonRise Inc. Generic USB Joystick " * - tested with a Tesun USB-703 game controller. * * Copyright (c) 2009 Richard Walmsley <richwalm@gmail.com> */ /* */ #include <linux/input.h> #include <linux/slab.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" #ifdef CONFIG_DRAGONRISE_FF struct drff_device { struct hid_report *report; }; static int drff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct drff_device *drff = data; int strong, weak; strong = effect->u.rumble.strong_magnitude; weak = effect->u.rumble.weak_magnitude; dbg_hid("called with 0x%04x 0x%04x", strong, weak); if (strong || weak) { strong = strong * 0xff / 0xffff; weak = weak * 0xff / 0xffff; /* While reverse engineering this device, I found that when this value is set, it causes the strong rumble to function at a near maximum speed, so we'll bypass it. */ if (weak == 0x0a) weak = 0x0b; drff->report->field[0]->value[0] = 0x51; drff->report->field[0]->value[1] = 0x00; drff->report->field[0]->value[2] = weak; drff->report->field[0]->value[4] = strong; hid_hw_request(hid, drff->report, HID_REQ_SET_REPORT); drff->report->field[0]->value[0] = 0xfa; drff->report->field[0]->value[1] = 0xfe; } else { drff->report->field[0]->value[0] = 0xf3; drff->report->field[0]->value[1] = 0x00; } drff->report->field[0]->value[2] = 0x00; drff->report->field[0]->value[4] = 0x00; dbg_hid("running with 0x%02x 0x%02x", strong, weak); hid_hw_request(hid, drff->report, HID_REQ_SET_REPORT); return 0; } static int drff_init(struct hid_device *hid) { struct drff_device *drff; struct hid_report *report; struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev; int error; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_first_entry(&hid->inputs, struct hid_input, list); dev = hidinput->input; if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; } report = list_first_entry(report_list, struct hid_report, list); if (report->maxfield < 1) { hid_err(hid, "no fields in the report\n"); return -ENODEV; } if (report->field[0]->report_count < 7) { hid_err(hid, "not enough values in the field\n"); return -ENODEV; } drff = kzalloc(sizeof(struct drff_device), GFP_KERNEL); if (!drff) return -ENOMEM; set_bit(FF_RUMBLE, dev->ffbit); error = input_ff_create_memless(dev, drff, drff_play); if (error) { kfree(drff); return error; } drff->report = report; drff->report->field[0]->value[0] = 0xf3; drff->report->field[0]->value[1] = 0x00; drff->report->field[0]->value[2] = 0x00; drff->report->field[0]->value[3] = 0x00; drff->report->field[0]->value[4] = 0x00; drff->report->field[0]->value[5] = 0x00; drff->report->field[0]->value[6] = 0x00; hid_hw_request(hid, drff->report, HID_REQ_SET_REPORT); hid_info(hid, "Force Feedback for DragonRise Inc. " "game controllers by Richard Walmsley <richwalm@gmail.com>\n"); return 0; } #else static inline int drff_init(struct hid_device *hid) { return 0; } #endif /* * The original descriptor of joystick with PID 0x0011, represented by DVTech PC * JS19. It seems both copied from another device and a result of confusion * either about the specification or about the program used to create the * descriptor. In any case, it's a wonder it works on Windows. * * Usage Page (Desktop), ; Generic desktop controls (01h) * Usage (Joystick), ; Joystick (04h, application collection) * Collection (Application), * Collection (Logical), * Report Size (8), * Report Count (5), * Logical Minimum (0), * Logical Maximum (255), * Physical Minimum (0), * Physical Maximum (255), * Usage (X), ; X (30h, dynamic value) * Usage (X), ; X (30h, dynamic value) * Usage (X), ; X (30h, dynamic value) * Usage (X), ; X (30h, dynamic value) * Usage (Y), ; Y (31h, dynamic value) * Input (Variable), * Report Size (4), * Report Count (1), * Logical Maximum (7), * Physical Maximum (315), * Unit (Degrees), * Usage (00h), * Input (Variable, Null State), * Unit, * Report Size (1), * Report Count (10), * Logical Maximum (1), * Physical Maximum (1), * Usage Page (Button), ; Button (09h) * Usage Minimum (01h), * Usage Maximum (0Ah), * Input (Variable), * Usage Page (FF00h), ; FF00h, vendor-defined * Report Size (1), * Report Count (10), * Logical Maximum (1), * Physical Maximum (1), * Usage (01h), * Input (Variable), * End Collection, * Collection (Logical), * Report Size (8), * Report Count (4), * Physical Maximum (255), * Logical Maximum (255), * Usage (02h), * Output (Variable), * End Collection, * End Collection */ /* Size of the original descriptor of the PID 0x0011 joystick */ #define PID0011_RDESC_ORIG_SIZE 101 /* Fixed report descriptor for PID 0x011 joystick */ static const __u8 pid0011_rdesc_fixed[] = { 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x04, /* Usage (Joystick), */ 0xA1, 0x01, /* Collection (Application), */ 0xA1, 0x02, /* Collection (Logical), */ 0x14, /* Logical Minimum (0), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x01, /* Input (Constant), */ 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ 0x95, 0x02, /* Report Count (2), */ 0x09, 0x30, /* Usage (X), */ 0x09, 0x31, /* Usage (Y), */ 0x81, 0x02, /* Input (Variable), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x01, /* Input (Constant), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x95, 0x0A, /* Report Count (10), */ 0x05, 0x09, /* Usage Page (Button), */ 0x19, 0x01, /* Usage Minimum (01h), */ 0x29, 0x0A, /* Usage Maximum (0Ah), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x0A, /* Report Count (10), */ 0x81, 0x01, /* Input (Constant), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection */ }; static const __u8 *dr_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { switch (hdev->product) { case 0x0011: if (*rsize == PID0011_RDESC_ORIG_SIZE) { *rsize = sizeof(pid0011_rdesc_fixed); return pid0011_rdesc_fixed; } break; } return rdesc; } #define map_abs(c) hid_map_usage(hi, usage, bit, max, EV_ABS, (c)) #define map_rel(c) hid_map_usage(hi, usage, bit, max, EV_REL, (c)) static int dr_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { switch (usage->hid) { /* * revert to the old hid-input behavior where axes * can be randomly assigned when hid->usage is reused. */ case HID_GD_X: case HID_GD_Y: case HID_GD_Z: case HID_GD_RX: case HID_GD_RY: case HID_GD_RZ: if (field->flags & HID_MAIN_ITEM_RELATIVE) map_rel(usage->hid & 0xf); else map_abs(usage->hid & 0xf); return 1; } return 0; } static int dr_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; dev_dbg(&hdev->dev, "DragonRise Inc. HID hardware probe..."); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } switch (hdev->product) { case 0x0006: ret = drff_init(hdev); if (ret) { dev_err(&hdev->dev, "force feedback init failed\n"); hid_hw_stop(hdev); goto err; } break; } return 0; err: return ret; } static const struct hid_device_id dr_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006), }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0011), }, { } }; MODULE_DEVICE_TABLE(hid, dr_devices); static struct hid_driver dr_driver = { .name = "dragonrise", .id_table = dr_devices, .report_fixup = dr_report_fixup, .probe = dr_probe, .input_mapping = dr_input_mapping, }; module_hid_driver(dr_driver); MODULE_DESCRIPTION("Force feedback support for DragonRise Inc. game controllers"); MODULE_LICENSE("GPL");
64 112 64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 #undef TRACE_SYSTEM #define TRACE_SYSTEM irq_matrix #if !defined(_TRACE_IRQ_MATRIX_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IRQ_MATRIX_H #include <linux/tracepoint.h> struct irq_matrix; struct cpumap; DECLARE_EVENT_CLASS(irq_matrix_global, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix), TP_STRUCT__entry( __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DECLARE_EVENT_CLASS(irq_matrix_global_update, TP_PROTO(int bit, struct irq_matrix *matrix), TP_ARGS(bit, matrix), TP_STRUCT__entry( __field( int, bit ) __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->bit = bit; __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("bit=%d online_maps=%d global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->bit, __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DECLARE_EVENT_CLASS(irq_matrix_cpu, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap), TP_STRUCT__entry( __field( int, bit ) __field( unsigned int, cpu ) __field( bool, online ) __field( unsigned int, available ) __field( unsigned int, allocated ) __field( unsigned int, managed ) __field( unsigned int, online_maps ) __field( unsigned int, global_available ) __field( unsigned int, global_reserved ) __field( unsigned int, total_allocated ) ), TP_fast_assign( __entry->bit = bit; __entry->cpu = cpu; __entry->online = cmap->online; __entry->available = cmap->available; __entry->allocated = cmap->allocated; __entry->managed = cmap->managed; __entry->online_maps = matrix->online_maps; __entry->global_available = matrix->global_available; __entry->global_reserved = matrix->global_reserved; __entry->total_allocated = matrix->total_allocated; ), TP_printk("bit=%d cpu=%u online=%d avl=%u alloc=%u managed=%u online_maps=%u global_avl=%u, global_rsvd=%u, total_alloc=%u", __entry->bit, __entry->cpu, __entry->online, __entry->available, __entry->allocated, __entry->managed, __entry->online_maps, __entry->global_available, __entry->global_reserved, __entry->total_allocated) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_online, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_offline, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_reserve, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global, irq_matrix_remove_reserved, TP_PROTO(struct irq_matrix *matrix), TP_ARGS(matrix) ); DEFINE_EVENT(irq_matrix_global_update, irq_matrix_assign_system, TP_PROTO(int bit, struct irq_matrix *matrix), TP_ARGS(bit, matrix) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_reserve_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_remove_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc_managed, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_assign, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_alloc, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); DEFINE_EVENT(irq_matrix_cpu, irq_matrix_free, TP_PROTO(int bit, unsigned int cpu, struct irq_matrix *matrix, struct cpumap *cmap), TP_ARGS(bit, cpu, matrix, cmap) ); #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
19 19 19 19 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 // SPDX-License-Identifier: GPL-2.0-only /* * linux/net/sunrpc/svc_xprt.c * * Author: Tom Tucker <tom@opengridcomputing.com> */ #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/errno.h> #include <linux/freezer.h> #include <linux/slab.h> #include <net/sock.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/bc_xprt.h> #include <linux/module.h> #include <linux/netdevice.h> #include <trace/events/sunrpc.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT static unsigned int svc_rpc_per_connection_limit __read_mostly; module_param(svc_rpc_per_connection_limit, uint, 0644); static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(struct timer_list *t); static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after * 6 minutes * http://nfsv4bat.org/Documents/ConnectAThon/1996/nfstcp.pdf */ static int svc_conn_age_period = 6*60; /* List of registered transport classes */ static DEFINE_SPINLOCK(svc_xprt_class_lock); static LIST_HEAD(svc_xprt_class_list); /* SMP locking strategy: * * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. * The "service mutex" protects svc_serv->sv_nrthread. * svc_sock->sk_lock protects the svc_sock->sk_deferred list * and the ->sk_info_authunix cache. * * The XPT_BUSY bit in xprt->xpt_flags prevents a transport being * enqueued multiply. During normal transport processing this bit * is set by svc_xprt_enqueue and cleared by svc_xprt_received. * Providers should not manipulate this bit directly. * * Some flags can be set to certain values at any time * providing that certain rules are followed: * * XPT_CONN, XPT_DATA: * - Can be set or cleared at any time. * - After a set, svc_xprt_enqueue must be called to enqueue * the transport for processing. * - After a clear, the transport must be read/accepted. * If this succeeds, it must be set again. * XPT_CLOSE: * - Can set at any time. It is never cleared. * XPT_DEAD: * - Can only be set while XPT_BUSY is held which ensures * that no other thread will be using the transport or will * try to set XPT_DEAD. */ /** * svc_reg_xprt_class - Register a server-side RPC transport class * @xcl: New transport class to be registered * * Returns zero on success; otherwise a negative errno is returned. */ int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; int res = -EEXIST; INIT_LIST_HEAD(&xcl->xcl_list); spin_lock(&svc_xprt_class_lock); /* Make sure there isn't already a class with the same name */ list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) { if (strcmp(xcl->xcl_name, cl->xcl_name) == 0) goto out; } list_add_tail(&xcl->xcl_list, &svc_xprt_class_list); res = 0; out: spin_unlock(&svc_xprt_class_lock); return res; } EXPORT_SYMBOL_GPL(svc_reg_xprt_class); /** * svc_unreg_xprt_class - Unregister a server-side RPC transport class * @xcl: Transport class to be unregistered * */ void svc_unreg_xprt_class(struct svc_xprt_class *xcl) { spin_lock(&svc_xprt_class_lock); list_del_init(&xcl->xcl_list); spin_unlock(&svc_xprt_class_lock); } EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); /** * svc_print_xprts - Format the transport list for printing * @buf: target buffer for formatted address * @maxlen: length of target buffer * * Fills in @buf with a string containing a list of transport names, each name * terminated with '\n'. If the buffer is too small, some entries may be * missing, but it is guaranteed that all lines in the output buffer are * complete. * * Returns positive length of the filled-in string. */ int svc_print_xprts(char *buf, int maxlen) { struct svc_xprt_class *xcl; char tmpstr[80]; int len = 0; buf[0] = '\0'; spin_lock(&svc_xprt_class_lock); list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { int slen; slen = snprintf(tmpstr, sizeof(tmpstr), "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); if (slen >= sizeof(tmpstr) || len + slen >= maxlen) break; len += slen; strcat(buf, tmpstr); } spin_unlock(&svc_xprt_class_lock); return len; } /** * svc_xprt_deferred_close - Close a transport * @xprt: transport instance * * Used in contexts that need to defer the work of shutting down * the transport to an nfsd thread. */ void svc_xprt_deferred_close(struct svc_xprt *xprt) { trace_svc_xprt_close(xprt); if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags)) svc_xprt_enqueue(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_deferred_close); static void svc_xprt_free(struct kref *kref) { struct svc_xprt *xprt = container_of(kref, struct svc_xprt, xpt_ref); struct module *owner = xprt->xpt_class->xcl_owner; if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); put_cred(xprt->xpt_cred); put_net_track(xprt->xpt_net, &xprt->ns_tracker); /* See comment on corresponding get in xs_setup_bc_tcp(): */ if (xprt->xpt_bc_xprt) xprt_put(xprt->xpt_bc_xprt); if (xprt->xpt_bc_xps) xprt_switch_put(xprt->xpt_bc_xps); trace_svc_xprt_free(xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } void svc_xprt_put(struct svc_xprt *xprt) { kref_put(&xprt->xpt_ref, svc_xprt_free); } EXPORT_SYMBOL_GPL(svc_xprt_put); /* * Called by transport drivers to initialize the transport independent * portion of the transport instance. */ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, struct svc_xprt *xprt, struct svc_serv *serv) { memset(xprt, 0, sizeof(*xprt)); xprt->xpt_class = xcl; xprt->xpt_ops = xcl->xcl_ops; kref_init(&xprt->xpt_ref); xprt->xpt_server = serv; INIT_LIST_HEAD(&xprt->xpt_list); INIT_LIST_HEAD(&xprt->xpt_deferred); INIT_LIST_HEAD(&xprt->xpt_users); mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); xprt->xpt_net = get_net_track(net, &xprt->ns_tracker, GFP_ATOMIC); strcpy(xprt->xpt_remotebuf, "uninitialized"); } EXPORT_SYMBOL_GPL(svc_xprt_init); /** * svc_xprt_received - start next receiver thread * @xprt: controlling transport * * The caller must hold the XPT_BUSY bit and must * not thereafter touch transport data. * * Note: XPT_DATA only gets cleared when a read-attempt finds no (or * insufficient) data. */ void svc_xprt_received(struct svc_xprt *xprt) { if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) { WARN_ONCE(1, "xprt=0x%p already busy!", xprt); return; } /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_xprt_enqueue with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_received); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) { clear_bit(XPT_TEMP, &new->xpt_flags); spin_lock_bh(&serv->sv_lock); list_add(&new->xpt_list, &serv->sv_permsocks); spin_unlock_bh(&serv->sv_lock); svc_xprt_received(new); } static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, struct sockaddr *sap, size_t len, int flags, const struct cred *cred) { struct svc_xprt_class *xcl; spin_lock(&svc_xprt_class_lock); list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { struct svc_xprt *newxprt; unsigned short newport; if (strcmp(xprt_name, xcl->xcl_name)) continue; if (!try_module_get(xcl->xcl_owner)) goto err; spin_unlock(&svc_xprt_class_lock); newxprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); if (IS_ERR(newxprt)) { trace_svc_xprt_create_err(serv->sv_programs->pg_name, xcl->xcl_name, sap, len, newxprt); module_put(xcl->xcl_owner); return PTR_ERR(newxprt); } newxprt->xpt_cred = get_cred(cred); svc_add_new_perm_xprt(serv, newxprt); newport = svc_xprt_local_port(newxprt); return newport; } err: spin_unlock(&svc_xprt_class_lock); /* This errno is exposed to user space. Provide a reasonable * perror msg for a bad transport. */ return -EPROTONOSUPPORT; } /** * svc_xprt_create_from_sa - Add a new listener to @serv from socket address * @serv: target RPC service * @xprt_name: transport class name * @net: network namespace * @sap: socket address pointer * @flags: SVC_SOCK flags * @cred: credential to bind to this transport * * Return local xprt port on success or %-EPROTONOSUPPORT on failure */ int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name, struct net *net, struct sockaddr *sap, int flags, const struct cred *cred) { size_t len; int err; switch (sap->sa_family) { case AF_INET: len = sizeof(struct sockaddr_in); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: len = sizeof(struct sockaddr_in6); break; #endif default: return -EAFNOSUPPORT; } err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, cred); } return err; } EXPORT_SYMBOL_GPL(svc_xprt_create_from_sa); /** * svc_xprt_create - Add a new listener to @serv * @serv: target RPC service * @xprt_name: transport class name * @net: network namespace * @family: network address family * @port: listener port * @flags: SVC_SOCK flags * @cred: credential to bind to this transport * * Return local xprt port on success or %-EPROTONOSUPPORT on failure */ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, const struct cred *cred) { struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; #if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(port), }; #endif struct sockaddr *sap; switch (family) { case PF_INET: sap = (struct sockaddr *)&sin; break; #if IS_ENABLED(CONFIG_IPV6) case PF_INET6: sap = (struct sockaddr *)&sin6; break; #endif default: return -EAFNOSUPPORT; } return svc_xprt_create_from_sa(serv, xprt_name, net, sap, flags, cred); } EXPORT_SYMBOL_GPL(svc_xprt_create); /* * Copy the local and remote xprt addresses to the rqstp structure */ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) { memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); rqstp->rq_addrlen = xprt->xpt_remotelen; /* * Destination address in request is needed for binding the * source address in RPC replies/callbacks later. */ memcpy(&rqstp->rq_daddr, &xprt->xpt_local, xprt->xpt_locallen); rqstp->rq_daddrlen = xprt->xpt_locallen; } EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); /** * svc_print_addr - Format rq_addr field for printing * @rqstp: svc_rqst struct containing address to print * @buf: target buffer for formatted address * @len: length of target buffer * */ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) { return __svc_print_addr(svc_addr(rqstp), buf, len); } EXPORT_SYMBOL_GPL(svc_print_addr); static bool svc_xprt_slots_in_range(struct svc_xprt *xprt) { unsigned int limit = svc_rpc_per_connection_limit; int nrqsts = atomic_read(&xprt->xpt_nr_rqsts); return limit == 0 || (nrqsts >= 0 && nrqsts < limit); } static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt) { if (!test_bit(RQ_DATA, &rqstp->rq_flags)) { if (!svc_xprt_slots_in_range(xprt)) return false; atomic_inc(&xprt->xpt_nr_rqsts); set_bit(RQ_DATA, &rqstp->rq_flags); } return true; } static void svc_xprt_release_slot(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) { atomic_dec(&xprt->xpt_nr_rqsts); smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */ svc_xprt_enqueue(xprt); } } static bool svc_xprt_ready(struct svc_xprt *xprt) { unsigned long xpt_flags; /* * If another cpu has recently updated xpt_flags, * sk_sock->flags, xpt_reserved, or xpt_nr_rqsts, we need to * know about it; otherwise it's possible that both that cpu and * this one could call svc_xprt_enqueue() without either * svc_xprt_enqueue() recognizing that the conditions below * are satisfied, and we could stall indefinitely: */ smp_rmb(); xpt_flags = READ_ONCE(xprt->xpt_flags); trace_svc_xprt_enqueue(xprt, xpt_flags); if (xpt_flags & BIT(XPT_BUSY)) return false; if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { if (xprt->xpt_ops->xpo_has_wspace(xprt) && svc_xprt_slots_in_range(xprt)) return true; trace_svc_xprt_no_write_space(xprt); return false; } return false; } /** * svc_xprt_enqueue - Queue a transport on an idle nfsd thread * @xprt: transport with data pending * */ void svc_xprt_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; if (!svc_xprt_ready(xprt)) return; /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY * atomically because it also guards against trying to enqueue * the transport twice. */ if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) return; pool = svc_pool_for_cpu(xprt->xpt_server); percpu_counter_inc(&pool->sp_sockets_queued); xprt->xpt_qtime = ktime_get(); lwq_enqueue(&xprt->xpt_ready, &pool->sp_xprts); svc_pool_wake_idle_thread(pool); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* * Dequeue the first transport, if there is one. */ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) { struct svc_xprt *xprt = NULL; xprt = lwq_dequeue(&pool->sp_xprts, struct svc_xprt, xpt_ready); if (xprt) svc_xprt_get(xprt); return xprt; } /** * svc_reserve - change the space reserved for the reply to a request. * @rqstp: The request in question * @space: new max space to reserve * * Each request reserves some space on the output queue of the transport * to make sure the reply fits. This function reduces that reserved * space to be the amount of space used already, plus @space. * */ void svc_reserve(struct svc_rqst *rqstp, int space) { struct svc_xprt *xprt = rqstp->rq_xprt; space += rqstp->rq_res.head[0].iov_len; if (xprt && space < rqstp->rq_reserved) { atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; smp_wmb(); /* See smp_rmb() in svc_xprt_ready() */ svc_xprt_enqueue(xprt); } } EXPORT_SYMBOL_GPL(svc_reserve); static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr) { if (!dr) return; xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt); kfree(dr); } static void svc_xprt_release(struct svc_rqst *rqstp) { struct svc_xprt *xprt = rqstp->rq_xprt; xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt); rqstp->rq_xprt_ctxt = NULL; free_deferred(xprt, rqstp->rq_deferred); rqstp->rq_deferred = NULL; svc_rqst_release_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; /* Reset response buffer and release * the reservation. * But first, check that enough space was reserved * for the reply, otherwise we have a bug! */ if ((rqstp->rq_res.len) > rqstp->rq_reserved) printk(KERN_ERR "RPC request reserved %d but used %d\n", rqstp->rq_reserved, rqstp->rq_res.len); rqstp->rq_res.head[0].iov_len = 0; svc_reserve(rqstp, 0); svc_xprt_release_slot(rqstp); rqstp->rq_xprt = NULL; svc_xprt_put(xprt); } /** * svc_wake_up - Wake up a service thread for non-transport work * @serv: RPC service * * Some svc_serv's will have occasional work to do, even when a xprt is not * waiting to be serviced. This function is there to "kick" a task in one of * those services so that it can wake up and do that work. Note that we only * bother with pool 0 as we don't need to wake up more than one thread for * this purpose. */ void svc_wake_up(struct svc_serv *serv) { struct svc_pool *pool = &serv->sv_pools[0]; set_bit(SP_TASK_PENDING, &pool->sp_flags); svc_pool_wake_idle_thread(pool); } EXPORT_SYMBOL_GPL(svc_wake_up); int svc_port_is_privileged(struct sockaddr *sin) { switch (sin->sa_family) { case AF_INET: return ntohs(((struct sockaddr_in *)sin)->sin_port) < PROT_SOCK; case AF_INET6: return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) < PROT_SOCK; default: return 0; } } /* * Make sure that we don't have too many connections that have not yet * demonstrated that they have access to the NFS server. If we have, * something must be dropped. It's not clear what will happen if we allow * "too many" connections, but when dealing with network-facing software, * we have to code defensively. Here we do that by imposing hard limits. * * There's no point in trying to do random drop here for DoS * prevention. The NFS clients does 1 reconnect in 15 seconds. An * attacker can easily beat that. * * The only somewhat efficient mechanism would be if drop old * connections from the same IP first. But right now we don't even * record the client IP in svc_sock. */ static void svc_check_conn_limits(struct svc_serv *serv) { if (serv->sv_tmpcnt > XPT_MAX_TMP_CONN) { struct svc_xprt *xprt = NULL, *xprti; spin_lock_bh(&serv->sv_lock); if (!list_empty(&serv->sv_tempsocks)) { /* * Always select the oldest connection. It's not fair, * but nor is life. */ list_for_each_entry_reverse(xprti, &serv->sv_tempsocks, xpt_list) { if (!test_bit(XPT_PEER_VALID, &xprti->xpt_flags)) { xprt = xprti; set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_get(xprt); break; } } } spin_unlock_bh(&serv->sv_lock); if (xprt) { svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } } } static bool svc_alloc_arg(struct svc_rqst *rqstp) { struct xdr_buf *arg = &rqstp->rq_arg; unsigned long pages, filled, ret; pages = rqstp->rq_maxpages; for (filled = 0; filled < pages; filled = ret) { ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages); if (ret > filled) /* Made progress, don't sleep yet */ continue; set_current_state(TASK_IDLE); if (svc_thread_should_stop(rqstp)) { set_current_state(TASK_RUNNING); return false; } trace_svc_alloc_arg_err(pages, ret); memalloc_retry_wait(GFP_KERNEL); } rqstp->rq_page_end = &rqstp->rq_pages[pages]; rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */ /* Make arg->head point to first page and arg->pages point to rest */ arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); arg->head[0].iov_len = PAGE_SIZE; arg->pages = rqstp->rq_pages + 1; arg->page_base = 0; /* save at least one page for response */ arg->page_len = (pages-2)*PAGE_SIZE; arg->len = (pages-1)*PAGE_SIZE; arg->tail[0].iov_len = 0; rqstp->rq_xid = xdr_zero; return true; } static bool svc_thread_should_sleep(struct svc_rqst *rqstp) { struct svc_pool *pool = rqstp->rq_pool; /* did someone call svc_wake_up? */ if (test_bit(SP_TASK_PENDING, &pool->sp_flags)) return false; /* was a socket queued? */ if (!lwq_empty(&pool->sp_xprts)) return false; /* are we shutting down? */ if (svc_thread_should_stop(rqstp)) return false; #if defined(CONFIG_SUNRPC_BACKCHANNEL) if (svc_is_backchannel(rqstp)) { if (!lwq_empty(&rqstp->rq_server->sv_cb_list)) return false; } #endif return true; } static void svc_thread_wait_for_work(struct svc_rqst *rqstp) { struct svc_pool *pool = rqstp->rq_pool; if (svc_thread_should_sleep(rqstp)) { set_current_state(TASK_IDLE | TASK_FREEZABLE); llist_add(&rqstp->rq_idle, &pool->sp_idle_threads); if (likely(svc_thread_should_sleep(rqstp))) schedule(); while (!llist_del_first_this(&pool->sp_idle_threads, &rqstp->rq_idle)) { /* Work just became available. This thread can only * handle it after removing rqstp from the idle * list. If that attempt failed, some other thread * must have queued itself after finding no * work to do, so that thread has taken responsibly * for this new work. This thread can safely sleep * until woken again. */ schedule(); set_current_state(TASK_IDLE | TASK_FREEZABLE); } __set_current_state(TASK_RUNNING); } else { cond_resched(); } try_to_freeze(); } static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) { spin_lock_bh(&serv->sv_lock); set_bit(XPT_TEMP, &newxpt->xpt_flags); list_add(&newxpt->xpt_list, &serv->sv_tempsocks); serv->sv_tmpcnt++; if (serv->sv_temptimer.function == NULL) { /* setup timer to age temp transports */ serv->sv_temptimer.function = svc_age_temp_xprts; mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } spin_unlock_bh(&serv->sv_lock); svc_xprt_received(newxpt); } static void svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) { struct svc_serv *serv = rqstp->rq_server; int len = 0; if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ goto out; } if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; /* * We know this module_get will succeed because the * listener holds a reference too */ __module_get(xprt->xpt_class->xcl_owner); svc_check_conn_limits(xprt->xpt_server); newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { newxpt->xpt_cred = get_cred(xprt->xpt_cred); svc_add_new_temp_xprt(serv, newxpt); trace_svc_xprt_accept(newxpt, serv->sv_name); } else { module_put(xprt->xpt_class->xcl_owner); } svc_xprt_received(xprt); } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { xprt->xpt_ops->xpo_handshake(xprt); svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ rqstp->rq_deferred = svc_deferred_dequeue(xprt); if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); else len = xprt->xpt_ops->xpo_recvfrom(rqstp); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); if (len <= 0) goto out; trace_svc_xdr_recvfrom(&rqstp->rq_arg); clear_bit(XPT_OLD, &xprt->xpt_flags); rqstp->rq_chandle.defer = svc_defer; if (serv->sv_stats) serv->sv_stats->netcnt++; percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); rqstp->rq_stime = ktime_get(); svc_process(rqstp); } else svc_xprt_received(xprt); out: rqstp->rq_res.len = 0; svc_xprt_release(rqstp); } static void svc_thread_wake_next(struct svc_rqst *rqstp) { if (!svc_thread_should_sleep(rqstp)) /* More work pending after I dequeued some, * wake another worker */ svc_pool_wake_idle_thread(rqstp->rq_pool); } /** * svc_recv - Receive and process the next request on any transport * @rqstp: an idle RPC service thread * * This code is carefully organised not to touch any cachelines in * the shared svc_serv structure, only cachelines in the local * svc_pool. */ void svc_recv(struct svc_rqst *rqstp) { struct svc_pool *pool = rqstp->rq_pool; if (!svc_alloc_arg(rqstp)) return; svc_thread_wait_for_work(rqstp); clear_bit(SP_TASK_PENDING, &pool->sp_flags); if (svc_thread_should_stop(rqstp)) { svc_thread_wake_next(rqstp); return; } rqstp->rq_xprt = svc_xprt_dequeue(pool); if (rqstp->rq_xprt) { struct svc_xprt *xprt = rqstp->rq_xprt; svc_thread_wake_next(rqstp); /* Normally we will wait up to 5 seconds for any required * cache information to be provided. When there are no * idle threads, we reduce the wait time. */ if (pool->sp_idle_threads.first) rqstp->rq_chandle.thread_wait = 5 * HZ; else rqstp->rq_chandle.thread_wait = 1 * HZ; trace_svc_xprt_dequeue(rqstp); svc_handle_xprt(rqstp, xprt); } #if defined(CONFIG_SUNRPC_BACKCHANNEL) if (svc_is_backchannel(rqstp)) { struct svc_serv *serv = rqstp->rq_server; struct rpc_rqst *req; req = lwq_dequeue(&serv->sv_cb_list, struct rpc_rqst, rq_bc_list); if (req) { svc_thread_wake_next(rqstp); svc_process_bc(req, rqstp); } } #endif } EXPORT_SYMBOL_GPL(svc_recv); /** * svc_send - Return reply to client * @rqstp: RPC transaction context * */ void svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; struct xdr_buf *xb; int status; xprt = rqstp->rq_xprt; /* calculate over-all length */ xb = &rqstp->rq_res; xb->len = xb->head[0].iov_len + xb->page_len + xb->tail[0].iov_len; trace_svc_xdr_sendto(rqstp->rq_xid, xb); trace_svc_stats_latency(rqstp); status = xprt->xpt_ops->xpo_sendto(rqstp); trace_svc_send(rqstp, status); } /* * Timer function to close old temporary transports, using * a mark-and-sweep algorithm. */ static void svc_age_temp_xprts(struct timer_list *t) { struct svc_serv *serv = timer_container_of(serv, t, sv_temptimer); struct svc_xprt *xprt; struct list_head *le, *next; dprintk("svc_age_temp_xprts\n"); if (!spin_trylock_bh(&serv->sv_lock)) { /* busy, try again 1 sec later */ dprintk("svc_age_temp_xprts: busy\n"); mod_timer(&serv->sv_temptimer, jiffies + HZ); return; } list_for_each_safe(le, next, &serv->sv_tempsocks) { xprt = list_entry(le, struct svc_xprt, xpt_list); /* First time through, just mark it OLD. Second time * through, close it. */ if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) continue; if (kref_read(&xprt->xpt_ref) > 1 || test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ svc_xprt_enqueue(xprt); } spin_unlock_bh(&serv->sv_lock); mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } /* Close temporary transports whose xpt_local matches server_addr immediately * instead of waiting for them to be picked up by the timer. * * This is meant to be called from a notifier_block that runs when an ip * address is deleted. */ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) { struct svc_xprt *xprt; struct list_head *le, *next; LIST_HEAD(to_be_closed); spin_lock_bh(&serv->sv_lock); list_for_each_safe(le, next, &serv->sv_tempsocks) { xprt = list_entry(le, struct svc_xprt, xpt_list); if (rpc_cmp_addr(server_addr, (struct sockaddr *) &xprt->xpt_local)) { dprintk("svc_age_temp_xprts_now: found %p\n", xprt); list_move(le, &to_be_closed); } } spin_unlock_bh(&serv->sv_lock); while (!list_empty(&to_be_closed)) { le = to_be_closed.next; list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_KILL_TEMP, &xprt->xpt_flags); dprintk("svc_age_temp_xprts_now: queuing xprt %p for closing\n", xprt); svc_xprt_enqueue(xprt); } } EXPORT_SYMBOL_GPL(svc_age_temp_xprts_now); static void call_xpt_users(struct svc_xprt *xprt) { struct svc_xpt_user *u; spin_lock(&xprt->xpt_lock); while (!list_empty(&xprt->xpt_users)) { u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); list_del_init(&u->list); u->callback(u); } spin_unlock(&xprt->xpt_lock); } /* * Remove a dead transport */ static void svc_delete_xprt(struct svc_xprt *xprt) { struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) return; trace_svc_xprt_detach(xprt); xprt->xpt_ops->xpo_detach(xprt); if (xprt->xpt_bc_xprt) xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt); spin_lock_bh(&serv->sv_lock); list_del_init(&xprt->xpt_list); if (test_bit(XPT_TEMP, &xprt->xpt_flags) && !test_bit(XPT_PEER_VALID, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); while ((dr = svc_deferred_dequeue(xprt)) != NULL) free_deferred(xprt, dr); call_xpt_users(xprt); svc_xprt_put(xprt); } /** * svc_xprt_close - Close a client connection * @xprt: transport to disconnect * */ void svc_xprt_close(struct svc_xprt *xprt) { trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ return; /* * We expect svc_close_xprt() to work even when no threads are * running (e.g., while configuring the server before starting * any threads), so if the transport isn't busy, we delete * it ourself: */ svc_delete_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_close); static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; int ret = 0; spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; ret++; set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); } spin_unlock_bh(&serv->sv_lock); return ret; } static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) { struct svc_xprt *xprt; int i; for (i = 0; i < serv->sv_nrpools; i++) { struct svc_pool *pool = &serv->sv_pools[i]; struct llist_node *q, **t1, *t2; q = lwq_dequeue_all(&pool->sp_xprts); lwq_for_each_safe(xprt, t1, t2, &q, xpt_ready) { if (xprt->xpt_net == net) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_delete_xprt(xprt); xprt = NULL; } } if (q) lwq_enqueue_batch(q, &pool->sp_xprts); } } /** * svc_xprt_destroy_all - Destroy transports associated with @serv * @serv: RPC service to be shut down * @net: target network namespace * * Server threads may still be running (especially in the case where the * service is still running in other network namespaces). * * So we shut down sockets the same way we would on a running server, by * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do * the close. In the case there are no such other threads, * threads running, svc_clean_up_xprts() does a simple version of a * server's main event loop, and in the case where there are other * threads, we may need to wait a little while and then check again to * see if they're done. */ void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net) { int delay = 0; while (svc_close_list(serv, &serv->sv_permsocks, net) + svc_close_list(serv, &serv->sv_tempsocks, net)) { svc_clean_up_xprts(serv, net); msleep(delay++); } } EXPORT_SYMBOL_GPL(svc_xprt_destroy_all); /* * Handle defer and revisit of requests */ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) { struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); struct svc_xprt *xprt = dr->xprt; spin_lock(&xprt->xpt_lock); set_bit(XPT_DEFERRED, &xprt->xpt_flags); if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) { spin_unlock(&xprt->xpt_lock); trace_svc_defer_drop(dr); free_deferred(xprt, dr); svc_xprt_put(xprt); return; } dr->xprt = NULL; list_add(&dr->handle.recent, &xprt->xpt_deferred); spin_unlock(&xprt->xpt_lock); trace_svc_defer_queue(dr); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } /* * Save the request off for later processing. The request buffer looks * like this: * * <xprt-header><rpc-header><rpc-pagelist><rpc-tail> * * This code can only handle requests that consist of an xprt-header * and rpc-header. */ static struct cache_deferred_req *svc_defer(struct cache_req *req) { struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_deferred_req *dr; if (rqstp->rq_arg.page_len || !test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags)) return NULL; /* if more than a page, give up FIXME */ if (rqstp->rq_deferred) { dr = rqstp->rq_deferred; rqstp->rq_deferred = NULL; } else { size_t skip; size_t size; /* FIXME maybe discard if size too large */ size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len; dr = kmalloc(size, GFP_KERNEL); if (dr == NULL) return NULL; dr->handle.owner = rqstp->rq_server; dr->prot = rqstp->rq_prot; memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); dr->addrlen = rqstp->rq_addrlen; dr->daddr = rqstp->rq_daddr; dr->argslen = rqstp->rq_arg.len >> 2; /* back up head to the start of the buffer and copy */ skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, dr->argslen << 2); } dr->xprt_ctxt = rqstp->rq_xprt_ctxt; rqstp->rq_xprt_ctxt = NULL; trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; } /* * recv data from a deferred request into an active one */ static noinline int svc_deferred_recv(struct svc_rqst *rqstp) { struct svc_deferred_req *dr = rqstp->rq_deferred; trace_svc_defer_recv(dr); /* setup iov_base past transport header */ rqstp->rq_arg.head[0].iov_base = dr->args; /* The iov_len does not include the transport header bytes */ rqstp->rq_arg.head[0].iov_len = dr->argslen << 2; rqstp->rq_arg.page_len = 0; /* The rq_arg.len includes the transport header bytes */ rqstp->rq_arg.len = dr->argslen << 2; rqstp->rq_prot = dr->prot; memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); rqstp->rq_addrlen = dr->addrlen; /* Save off transport header len in case we get deferred again */ rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; rqstp->rq_xprt_ctxt = dr->xprt_ctxt; dr->xprt_ctxt = NULL; svc_xprt_received(rqstp->rq_xprt); return dr->argslen << 2; } static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) { struct svc_deferred_req *dr = NULL; if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) return NULL; spin_lock(&xprt->xpt_lock); if (!list_empty(&xprt->xpt_deferred)) { dr = list_entry(xprt->xpt_deferred.next, struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); } else clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); return dr; } /** * svc_find_listener - find an RPC transport instance * @serv: pointer to svc_serv to search * @xcl_name: C string containing transport's class name * @net: owner net pointer * @sa: sockaddr containing address * * Return the transport instance pointer for the endpoint accepting * connections/peer traffic from the specified transport class, * and matching sockaddr. */ struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name, struct net *net, const struct sockaddr *sa) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { if (xprt->xpt_net != net) continue; if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) continue; if (!rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) continue; found = xprt; svc_xprt_get(xprt); break; } spin_unlock_bh(&serv->sv_lock); return found; } EXPORT_SYMBOL_GPL(svc_find_listener); /** * svc_find_xprt - find an RPC transport instance * @serv: pointer to svc_serv to search * @xcl_name: C string containing transport's class name * @net: owner net pointer * @af: Address family of transport's local address * @port: transport's IP port number * * Return the transport instance pointer for the endpoint accepting * connections/peer traffic from the specified transport class, * address family and port. * * Specifying 0 for the address family or port is effectively a * wild-card, and will result in matching the first transport in the * service's list that has a matching class name. */ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, struct net *net, const sa_family_t af, const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; /* Sanity check the args */ if (serv == NULL || xcl_name == NULL) return found; spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { if (xprt->xpt_net != net) continue; if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) continue; if (port != 0 && port != svc_xprt_local_port(xprt)) continue; found = xprt; svc_xprt_get(xprt); break; } spin_unlock_bh(&serv->sv_lock); return found; } EXPORT_SYMBOL_GPL(svc_find_xprt); static int svc_one_xprt_name(const struct svc_xprt *xprt, char *pos, int remaining) { int len; len = snprintf(pos, remaining, "%s %u\n", xprt->xpt_class->xcl_name, svc_xprt_local_port(xprt)); if (len >= remaining) return -ENAMETOOLONG; return len; } /** * svc_xprt_names - format a buffer with a list of transport names * @serv: pointer to an RPC service * @buf: pointer to a buffer to be filled in * @buflen: length of buffer to be filled in * * Fills in @buf with a string containing a list of transport names, * each name terminated with '\n'. * * Returns positive length of the filled-in string on success; otherwise * a negative errno value is returned if an error occurs. */ int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) { struct svc_xprt *xprt; int len, totlen; char *pos; /* Sanity check args */ if (!serv) return 0; spin_lock_bh(&serv->sv_lock); pos = buf; totlen = 0; list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { len = svc_one_xprt_name(xprt, pos, buflen - totlen); if (len < 0) { *buf = '\0'; totlen = len; } if (len <= 0) break; pos += len; totlen += len; } spin_unlock_bh(&serv->sv_lock); return totlen; } EXPORT_SYMBOL_GPL(svc_xprt_names); /*----------------------------------------------------------------------------*/ static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) { unsigned int pidx = (unsigned int)*pos; struct svc_info *si = m->private; dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); mutex_lock(si->mutex); if (!pidx) return SEQ_START_TOKEN; if (!si->serv) return NULL; return pidx > si->serv->sv_nrpools ? NULL : &si->serv->sv_pools[pidx - 1]; } static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) { struct svc_pool *pool = p; struct svc_info *si = m->private; struct svc_serv *serv = si->serv; dprintk("svc_pool_stats_next, *pos=%llu\n", *pos); if (!serv) { pool = NULL; } else if (p == SEQ_START_TOKEN) { pool = &serv->sv_pools[0]; } else { unsigned int pidx = (pool - &serv->sv_pools[0]); if (pidx < serv->sv_nrpools-1) pool = &serv->sv_pools[pidx+1]; else pool = NULL; } ++*pos; return pool; } static void svc_pool_stats_stop(struct seq_file *m, void *p) { struct svc_info *si = m->private; mutex_unlock(si->mutex); } static int svc_pool_stats_show(struct seq_file *m, void *p) { struct svc_pool *pool = p; if (p == SEQ_START_TOKEN) { seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n"); return 0; } seq_printf(m, "%u %llu %llu %llu 0\n", pool->sp_id, percpu_counter_sum_positive(&pool->sp_messages_arrived), percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_threads_woken)); return 0; } static const struct seq_operations svc_pool_stats_seq_ops = { .start = svc_pool_stats_start, .next = svc_pool_stats_next, .stop = svc_pool_stats_stop, .show = svc_pool_stats_show, }; int svc_pool_stats_open(struct svc_info *info, struct file *file) { struct seq_file *seq; int err; err = seq_open(file, &svc_pool_stats_seq_ops); if (err) return err; seq = file->private_data; seq->private = info; return 0; } EXPORT_SYMBOL(svc_pool_stats_open); /*----------------------------------------------------------------------------*/
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 /* * Atheros CARL9170 driver * * USB - frontend * * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> * Copyright 2009, 2010, Christian Lamparter <chunkeey@googlemail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, see * http://www.gnu.org/licenses/. * * This file incorporates work covered by the following copyright and * permission notice: * Copyright (c) 2007-2008 Atheros Communications, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/firmware.h> #include <linux/etherdevice.h> #include <linux/device.h> #include <net/mac80211.h> #include "carl9170.h" #include "cmd.h" #include "hw.h" #include "fwcmd.h" MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>"); MODULE_AUTHOR("Christian Lamparter <chunkeey@googlemail.com>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Atheros AR9170 802.11n USB wireless"); MODULE_FIRMWARE(CARL9170FW_NAME); MODULE_ALIAS("ar9170usb"); MODULE_ALIAS("arusb_lnx"); /* * Note: * * Always update our wiki's device list (located at: * https://wireless.wiki.kernel.org/en/users/Drivers/ar9170/devices ), * whenever you add a new device. */ static const struct usb_device_id carl9170_usb_ids[] = { /* Atheros 9170 */ { USB_DEVICE(0x0cf3, 0x9170) }, /* Atheros TG121N */ { USB_DEVICE(0x0cf3, 0x1001) }, /* TP-Link TL-WN821N v2 */ { USB_DEVICE(0x0cf3, 0x1002), .driver_info = CARL9170_WPS_BUTTON | CARL9170_ONE_LED }, /* 3Com Dual Band 802.11n USB Adapter */ { USB_DEVICE(0x0cf3, 0x1010) }, /* H3C Dual Band 802.11n USB Adapter */ { USB_DEVICE(0x0cf3, 0x1011) }, /* Cace Airpcap NX */ { USB_DEVICE(0xcace, 0x0300) }, /* D-Link DWA 160 A1 */ { USB_DEVICE(0x07d1, 0x3c10) }, /* D-Link DWA 160 A2 */ { USB_DEVICE(0x07d1, 0x3a09) }, /* D-Link DWA 130 D */ { USB_DEVICE(0x07d1, 0x3a0f) }, /* Netgear WNA1000 */ { USB_DEVICE(0x0846, 0x9040) }, /* Netgear WNDA3100 (v1) */ { USB_DEVICE(0x0846, 0x9010) }, /* Netgear WN111 v2 */ { USB_DEVICE(0x0846, 0x9001), .driver_info = CARL9170_ONE_LED }, /* Zydas ZD1221 */ { USB_DEVICE(0x0ace, 0x1221) }, /* Proxim ORiNOCO 802.11n USB */ { USB_DEVICE(0x1435, 0x0804) }, /* WNC Generic 11n USB Dongle */ { USB_DEVICE(0x1435, 0x0326) }, /* ZyXEL NWD271N */ { USB_DEVICE(0x0586, 0x3417) }, /* Z-Com UB81 BG */ { USB_DEVICE(0x0cde, 0x0023) }, /* Z-Com UB82 ABG */ { USB_DEVICE(0x0cde, 0x0026) }, /* Sphairon Homelink 1202 */ { USB_DEVICE(0x0cde, 0x0027) }, /* Arcadyan WN7512 */ { USB_DEVICE(0x083a, 0xf522) }, /* Planex GWUS300 */ { USB_DEVICE(0x2019, 0x5304) }, /* IO-Data WNGDNUS2 */ { USB_DEVICE(0x04bb, 0x093f) }, /* NEC WL300NU-G */ { USB_DEVICE(0x0409, 0x0249) }, /* NEC WL300NU-AG */ { USB_DEVICE(0x0409, 0x02b4) }, /* AVM FRITZ!WLAN USB Stick N */ { USB_DEVICE(0x057c, 0x8401) }, /* AVM FRITZ!WLAN USB Stick N 2.4 */ { USB_DEVICE(0x057c, 0x8402) }, /* Qwest/Actiontec 802AIN Wireless N USB Network Adapter */ { USB_DEVICE(0x1668, 0x1200) }, /* Airlive X.USB a/b/g/n */ { USB_DEVICE(0x1b75, 0x9170) }, /* terminate */ {} }; MODULE_DEVICE_TABLE(usb, carl9170_usb_ids); static struct usb_driver carl9170_driver; static void carl9170_usb_submit_data_urb(struct ar9170 *ar) { struct urb *urb; int err; if (atomic_inc_return(&ar->tx_anch_urbs) > AR9170_NUM_TX_URBS) goto err_acc; urb = usb_get_from_anchor(&ar->tx_wait); if (!urb) goto err_acc; usb_anchor_urb(urb, &ar->tx_anch); err = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(err)) { if (net_ratelimit()) { dev_err(&ar->udev->dev, "tx submit failed (%d)\n", urb->status); } usb_unanchor_urb(urb); usb_anchor_urb(urb, &ar->tx_err); } usb_free_urb(urb); if (likely(err == 0)) return; err_acc: atomic_dec(&ar->tx_anch_urbs); } static void carl9170_usb_tx_data_complete(struct urb *urb) { struct ar9170 *ar = usb_get_intfdata(usb_ifnum_to_if(urb->dev, 0)); if (WARN_ON_ONCE(!ar)) { dev_kfree_skb_irq(urb->context); return; } atomic_dec(&ar->tx_anch_urbs); switch (urb->status) { /* everything is fine */ case 0: carl9170_tx_callback(ar, urb->context); break; /* disconnect */ case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: /* * Defer the frame clean-up to the tasklet worker. * This is necessary, because carl9170_tx_drop * does not work in an irqsave context. */ usb_anchor_urb(urb, &ar->tx_err); return; /* a random transmission error has occurred? */ default: if (net_ratelimit()) { dev_err(&ar->udev->dev, "tx failed (%d)\n", urb->status); } usb_anchor_urb(urb, &ar->tx_err); break; } if (likely(IS_STARTED(ar))) carl9170_usb_submit_data_urb(ar); } static int carl9170_usb_submit_cmd_urb(struct ar9170 *ar) { struct urb *urb; int err; if (atomic_inc_return(&ar->tx_cmd_urbs) != 1) { atomic_dec(&ar->tx_cmd_urbs); return 0; } urb = usb_get_from_anchor(&ar->tx_cmd); if (!urb) { atomic_dec(&ar->tx_cmd_urbs); return 0; } usb_anchor_urb(urb, &ar->tx_anch); err = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(err)) { usb_unanchor_urb(urb); atomic_dec(&ar->tx_cmd_urbs); } usb_free_urb(urb); return err; } static void carl9170_usb_cmd_complete(struct urb *urb) { struct ar9170 *ar = urb->context; int err = 0; if (WARN_ON_ONCE(!ar)) return; atomic_dec(&ar->tx_cmd_urbs); switch (urb->status) { /* everything is fine */ case 0: break; /* disconnect */ case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: return; default: err = urb->status; break; } if (!IS_INITIALIZED(ar)) return; if (err) dev_err(&ar->udev->dev, "submit cmd cb failed (%d).\n", err); err = carl9170_usb_submit_cmd_urb(ar); if (err) dev_err(&ar->udev->dev, "submit cmd failed (%d).\n", err); } static void carl9170_usb_rx_irq_complete(struct urb *urb) { struct ar9170 *ar = urb->context; if (WARN_ON_ONCE(!ar)) return; switch (urb->status) { /* everything is fine */ case 0: break; /* disconnect */ case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: return; default: goto resubmit; } /* * While the carl9170 firmware does not use this EP, the * firmware loader in the EEPROM unfortunately does. * Therefore we need to be ready to handle out-of-band * responses and traps in case the firmware crashed and * the loader took over again. */ carl9170_handle_command_response(ar, urb->transfer_buffer, urb->actual_length); resubmit: usb_anchor_urb(urb, &ar->rx_anch); if (unlikely(usb_submit_urb(urb, GFP_ATOMIC))) usb_unanchor_urb(urb); } static int carl9170_usb_submit_rx_urb(struct ar9170 *ar, gfp_t gfp) { struct urb *urb; int err = 0, runs = 0; while ((atomic_read(&ar->rx_anch_urbs) < AR9170_NUM_RX_URBS) && (runs++ < AR9170_NUM_RX_URBS)) { err = -ENOSPC; urb = usb_get_from_anchor(&ar->rx_pool); if (urb) { usb_anchor_urb(urb, &ar->rx_anch); err = usb_submit_urb(urb, gfp); if (unlikely(err)) { usb_unanchor_urb(urb); usb_anchor_urb(urb, &ar->rx_pool); } else { atomic_dec(&ar->rx_pool_urbs); atomic_inc(&ar->rx_anch_urbs); } usb_free_urb(urb); } } return err; } static void carl9170_usb_rx_work(struct ar9170 *ar) { struct urb *urb; int i; for (i = 0; i < AR9170_NUM_RX_URBS_POOL; i++) { urb = usb_get_from_anchor(&ar->rx_work); if (!urb) break; atomic_dec(&ar->rx_work_urbs); if (IS_INITIALIZED(ar)) { carl9170_rx(ar, urb->transfer_buffer, urb->actual_length); } usb_anchor_urb(urb, &ar->rx_pool); atomic_inc(&ar->rx_pool_urbs); usb_free_urb(urb); carl9170_usb_submit_rx_urb(ar, GFP_ATOMIC); } } void carl9170_usb_handle_tx_err(struct ar9170 *ar) { struct urb *urb; while ((urb = usb_get_from_anchor(&ar->tx_err))) { struct sk_buff *skb = urb->context; carl9170_tx_drop(ar, skb); carl9170_tx_callback(ar, skb); usb_free_urb(urb); } } static void carl9170_usb_tasklet(struct tasklet_struct *t) { struct ar9170 *ar = from_tasklet(ar, t, usb_tasklet); if (!IS_INITIALIZED(ar)) return; carl9170_usb_rx_work(ar); /* * Strictly speaking: The tx scheduler is not part of the USB system. * But the rx worker returns frames back to the mac80211-stack and * this is the _perfect_ place to generate the next transmissions. */ if (IS_STARTED(ar)) carl9170_tx_scheduler(ar); } static void carl9170_usb_rx_complete(struct urb *urb) { struct ar9170 *ar = urb->context; int err; if (WARN_ON_ONCE(!ar)) return; atomic_dec(&ar->rx_anch_urbs); switch (urb->status) { case 0: /* rx path */ usb_anchor_urb(urb, &ar->rx_work); atomic_inc(&ar->rx_work_urbs); break; case -ENOENT: case -ECONNRESET: case -ENODEV: case -ESHUTDOWN: /* handle disconnect events*/ return; default: /* handle all other errors */ usb_anchor_urb(urb, &ar->rx_pool); atomic_inc(&ar->rx_pool_urbs); break; } err = carl9170_usb_submit_rx_urb(ar, GFP_ATOMIC); if (unlikely(err)) { /* * usb_submit_rx_urb reported a problem. * In case this is due to a rx buffer shortage, * elevate the tasklet worker priority to * the highest available level. */ tasklet_hi_schedule(&ar->usb_tasklet); if (atomic_read(&ar->rx_anch_urbs) == 0) { /* * At this point, either the system is too slow to * cope with the enormous workload (so we have simply * run out of active rx urbs and this unfortunately * leads to an unpredictable device), or the device * is not fully functional after an unsuccessful * firmware loading attempts (so it doesn't pass * ieee80211_register_hw() and there is no internal * workqueue at all). */ if (ar->registered) ieee80211_queue_work(ar->hw, &ar->ping_work); else pr_warn_once("device %s is not registered\n", dev_name(&ar->udev->dev)); } } else { /* * Using anything less than _high_ priority absolutely * kills the rx performance my UP-System... */ tasklet_hi_schedule(&ar->usb_tasklet); } } static struct urb *carl9170_usb_alloc_rx_urb(struct ar9170 *ar, gfp_t gfp) { struct urb *urb; void *buf; buf = kmalloc(ar->fw.rx_size, gfp); if (!buf) return NULL; urb = usb_alloc_urb(0, gfp); if (!urb) { kfree(buf); return NULL; } usb_fill_bulk_urb(urb, ar->udev, usb_rcvbulkpipe(ar->udev, AR9170_USB_EP_RX), buf, ar->fw.rx_size, carl9170_usb_rx_complete, ar); urb->transfer_flags |= URB_FREE_BUFFER; return urb; } static int carl9170_usb_send_rx_irq_urb(struct ar9170 *ar) { struct urb *urb = NULL; void *ibuf; int err = -ENOMEM; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) goto out; ibuf = kmalloc(AR9170_USB_EP_CTRL_MAX, GFP_KERNEL); if (!ibuf) goto out; usb_fill_int_urb(urb, ar->udev, usb_rcvintpipe(ar->udev, AR9170_USB_EP_IRQ), ibuf, AR9170_USB_EP_CTRL_MAX, carl9170_usb_rx_irq_complete, ar, 1); urb->transfer_flags |= URB_FREE_BUFFER; usb_anchor_urb(urb, &ar->rx_anch); err = usb_submit_urb(urb, GFP_KERNEL); if (err) usb_unanchor_urb(urb); out: usb_free_urb(urb); return err; } static int carl9170_usb_init_rx_bulk_urbs(struct ar9170 *ar) { struct urb *urb; int i, err = -EINVAL; /* * The driver actively maintains a second shadow * pool for inactive, but fully-prepared rx urbs. * * The pool should help the driver to master huge * workload spikes without running the risk of * undersupplying the hardware or wasting time by * processing rx data (streams) inside the urb * completion (hardirq context). */ for (i = 0; i < AR9170_NUM_RX_URBS_POOL; i++) { urb = carl9170_usb_alloc_rx_urb(ar, GFP_KERNEL); if (!urb) { err = -ENOMEM; goto err_out; } usb_anchor_urb(urb, &ar->rx_pool); atomic_inc(&ar->rx_pool_urbs); usb_free_urb(urb); } err = carl9170_usb_submit_rx_urb(ar, GFP_KERNEL); if (err) goto err_out; /* the device now waiting for the firmware. */ carl9170_set_state_when(ar, CARL9170_STOPPED, CARL9170_IDLE); return 0; err_out: usb_scuttle_anchored_urbs(&ar->rx_pool); usb_scuttle_anchored_urbs(&ar->rx_work); usb_kill_anchored_urbs(&ar->rx_anch); return err; } static int carl9170_usb_flush(struct ar9170 *ar) { struct urb *urb; int ret, err = 0; while ((urb = usb_get_from_anchor(&ar->tx_wait))) { struct sk_buff *skb = urb->context; carl9170_tx_drop(ar, skb); carl9170_tx_callback(ar, skb); usb_free_urb(urb); } ret = usb_wait_anchor_empty_timeout(&ar->tx_cmd, 1000); if (ret == 0) err = -ETIMEDOUT; /* lets wait a while until the tx - queues are dried out */ ret = usb_wait_anchor_empty_timeout(&ar->tx_anch, 1000); if (ret == 0) err = -ETIMEDOUT; usb_kill_anchored_urbs(&ar->tx_anch); carl9170_usb_handle_tx_err(ar); return err; } static void carl9170_usb_cancel_urbs(struct ar9170 *ar) { int err; carl9170_set_state(ar, CARL9170_UNKNOWN_STATE); err = carl9170_usb_flush(ar); if (err) dev_err(&ar->udev->dev, "stuck tx urbs!\n"); usb_poison_anchored_urbs(&ar->tx_anch); carl9170_usb_handle_tx_err(ar); usb_poison_anchored_urbs(&ar->rx_anch); tasklet_kill(&ar->usb_tasklet); usb_scuttle_anchored_urbs(&ar->rx_work); usb_scuttle_anchored_urbs(&ar->rx_pool); usb_scuttle_anchored_urbs(&ar->tx_cmd); } int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd, const bool free_buf) { struct urb *urb; int err = 0; if (!IS_INITIALIZED(ar)) { err = -EPERM; goto err_free; } if (WARN_ON(cmd->hdr.len > CARL9170_MAX_CMD_LEN - 4)) { err = -EINVAL; goto err_free; } urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { err = -ENOMEM; goto err_free; } if (ar->usb_ep_cmd_is_bulk) usb_fill_bulk_urb(urb, ar->udev, usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4, carl9170_usb_cmd_complete, ar); else usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4, carl9170_usb_cmd_complete, ar, 1); if (free_buf) urb->transfer_flags |= URB_FREE_BUFFER; usb_anchor_urb(urb, &ar->tx_cmd); usb_free_urb(urb); return carl9170_usb_submit_cmd_urb(ar); err_free: if (free_buf) kfree(cmd); return err; } int carl9170_exec_cmd(struct ar9170 *ar, const enum carl9170_cmd_oids cmd, unsigned int plen, void *payload, unsigned int outlen, void *out) { int err = -ENOMEM; unsigned long time_left; if (!IS_ACCEPTING_CMD(ar)) return -EIO; if (!(cmd & CARL9170_CMD_ASYNC_FLAG)) might_sleep(); ar->cmd.hdr.len = plen; ar->cmd.hdr.cmd = cmd; /* writing multiple regs fills this buffer already */ if (plen && payload != (u8 *)(ar->cmd.data)) memcpy(ar->cmd.data, payload, plen); spin_lock_bh(&ar->cmd_lock); ar->readbuf = out; ar->readlen = outlen; spin_unlock_bh(&ar->cmd_lock); reinit_completion(&ar->cmd_wait); err = __carl9170_exec_cmd(ar, &ar->cmd, false); if (!(cmd & CARL9170_CMD_ASYNC_FLAG)) { time_left = wait_for_completion_timeout(&ar->cmd_wait, HZ); if (time_left == 0) { err = -ETIMEDOUT; goto err_unbuf; } if (ar->readlen != outlen) { err = -EMSGSIZE; goto err_unbuf; } } return 0; err_unbuf: /* Maybe the device was removed in the moment we were waiting? */ if (IS_STARTED(ar)) { dev_err(&ar->udev->dev, "no command feedback " "received (%d).\n", err); /* provide some maybe useful debug information */ print_hex_dump_bytes("carl9170 cmd: ", DUMP_PREFIX_NONE, &ar->cmd, plen + 4); carl9170_restart(ar, CARL9170_RR_COMMAND_TIMEOUT); } /* invalidate to avoid completing the next command prematurely */ spin_lock_bh(&ar->cmd_lock); ar->readbuf = NULL; ar->readlen = 0; spin_unlock_bh(&ar->cmd_lock); return err; } void carl9170_usb_tx(struct ar9170 *ar, struct sk_buff *skb) { struct urb *urb; struct ar9170_stream *tx_stream; void *data; unsigned int len; if (!IS_STARTED(ar)) goto err_drop; urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) goto err_drop; if (ar->fw.tx_stream) { tx_stream = (void *) (skb->data - sizeof(*tx_stream)); len = skb->len + sizeof(*tx_stream); tx_stream->length = cpu_to_le16(len); tx_stream->tag = cpu_to_le16(AR9170_TX_STREAM_TAG); data = tx_stream; } else { data = skb->data; len = skb->len; } usb_fill_bulk_urb(urb, ar->udev, usb_sndbulkpipe(ar->udev, AR9170_USB_EP_TX), data, len, carl9170_usb_tx_data_complete, skb); urb->transfer_flags |= URB_ZERO_PACKET; usb_anchor_urb(urb, &ar->tx_wait); usb_free_urb(urb); carl9170_usb_submit_data_urb(ar); return; err_drop: carl9170_tx_drop(ar, skb); carl9170_tx_callback(ar, skb); } static void carl9170_release_firmware(struct ar9170 *ar) { if (ar->fw.fw) { release_firmware(ar->fw.fw); memset(&ar->fw, 0, sizeof(ar->fw)); } } void carl9170_usb_stop(struct ar9170 *ar) { int ret; carl9170_set_state_when(ar, CARL9170_IDLE, CARL9170_STOPPED); ret = carl9170_usb_flush(ar); if (ret) dev_err(&ar->udev->dev, "kill pending tx urbs.\n"); usb_poison_anchored_urbs(&ar->tx_anch); carl9170_usb_handle_tx_err(ar); /* kill any pending command */ spin_lock_bh(&ar->cmd_lock); ar->readlen = 0; spin_unlock_bh(&ar->cmd_lock); complete(&ar->cmd_wait); /* * Note: * So far we freed all tx urbs, but we won't dare to touch any rx urbs. * Else we would end up with a unresponsive device... */ } int carl9170_usb_open(struct ar9170 *ar) { usb_unpoison_anchored_urbs(&ar->tx_anch); carl9170_set_state_when(ar, CARL9170_STOPPED, CARL9170_IDLE); return 0; } static int carl9170_usb_load_firmware(struct ar9170 *ar) { const u8 *data; u8 *buf; unsigned int transfer; size_t len; u32 addr; int err = 0; buf = kmalloc(4096, GFP_KERNEL); if (!buf) { err = -ENOMEM; goto err_out; } data = ar->fw.fw->data; len = ar->fw.fw->size; addr = ar->fw.address; /* this removes the miniboot image */ data += ar->fw.offset; len -= ar->fw.offset; while (len) { transfer = min_t(unsigned int, len, 4096u); memcpy(buf, data, transfer); err = usb_control_msg(ar->udev, usb_sndctrlpipe(ar->udev, 0), 0x30 /* FW DL */, 0x40 | USB_DIR_OUT, addr >> 8, 0, buf, transfer, 100); if (err < 0) { kfree(buf); goto err_out; } len -= transfer; data += transfer; addr += transfer; } kfree(buf); err = usb_control_msg(ar->udev, usb_sndctrlpipe(ar->udev, 0), 0x31 /* FW DL COMPLETE */, 0x40 | USB_DIR_OUT, 0, 0, NULL, 0, 200); if (wait_for_completion_timeout(&ar->fw_boot_wait, HZ) == 0) { err = -ETIMEDOUT; goto err_out; } err = carl9170_echo_test(ar, 0x4a110123); if (err) goto err_out; /* now, start the command response counter */ ar->cmd_seq = -1; return 0; err_out: dev_err(&ar->udev->dev, "firmware upload failed (%d).\n", err); return err; } int carl9170_usb_restart(struct ar9170 *ar) { int err = 0; if (ar->intf->condition != USB_INTERFACE_BOUND) return 0; /* * Disable the command response sequence counter check. * We already know that the device/firmware is in a bad state. * So, no extra points are awarded to anyone who reminds the * driver about that. */ ar->cmd_seq = -2; err = carl9170_reboot(ar); carl9170_usb_stop(ar); if (err) goto err_out; tasklet_schedule(&ar->usb_tasklet); /* The reboot procedure can take quite a while to complete. */ msleep(1100); err = carl9170_usb_open(ar); if (err) goto err_out; err = carl9170_usb_load_firmware(ar); if (err) goto err_out; return 0; err_out: carl9170_usb_cancel_urbs(ar); return err; } void carl9170_usb_reset(struct ar9170 *ar) { /* * This is the last resort to get the device going again * without any *user replugging action*. * * But there is a catch: usb_reset really is like a physical * *reconnect*. The mac80211 state will be lost in the process. * Therefore a userspace application, which is monitoring * the link must step in. */ carl9170_usb_cancel_urbs(ar); carl9170_usb_stop(ar); usb_queue_reset_device(ar->intf); } static int carl9170_usb_init_device(struct ar9170 *ar) { int err; /* * The carl9170 firmware let's the driver know when it's * ready for action. But we have to be prepared to gracefully * handle all spurious [flushed] messages after each (re-)boot. * Thus the command response counter remains disabled until it * can be safely synchronized. */ ar->cmd_seq = -2; err = carl9170_usb_send_rx_irq_urb(ar); if (err) goto err_out; err = carl9170_usb_init_rx_bulk_urbs(ar); if (err) goto err_unrx; err = carl9170_usb_open(ar); if (err) goto err_unrx; mutex_lock(&ar->mutex); err = carl9170_usb_load_firmware(ar); mutex_unlock(&ar->mutex); if (err) goto err_stop; return 0; err_stop: carl9170_usb_stop(ar); err_unrx: carl9170_usb_cancel_urbs(ar); err_out: return err; } static void carl9170_usb_firmware_failed(struct ar9170 *ar) { /* Store a copies of the usb_interface and usb_device pointer locally. * This is because release_driver initiates carl9170_usb_disconnect, * which in turn frees our driver context (ar). */ struct usb_interface *intf = ar->intf; struct usb_device *udev = ar->udev; complete(&ar->fw_load_wait); /* at this point 'ar' could be already freed. Don't use it anymore */ ar = NULL; /* unbind anything failed */ usb_lock_device(udev); usb_driver_release_interface(&carl9170_driver, intf); usb_unlock_device(udev); usb_put_intf(intf); } static void carl9170_usb_firmware_finish(struct ar9170 *ar) { struct usb_interface *intf = ar->intf; int err; err = carl9170_parse_firmware(ar); if (err) goto err_freefw; err = carl9170_usb_init_device(ar); if (err) goto err_freefw; err = carl9170_register(ar); carl9170_usb_stop(ar); if (err) goto err_unrx; complete(&ar->fw_load_wait); usb_put_intf(intf); return; err_unrx: carl9170_usb_cancel_urbs(ar); err_freefw: carl9170_release_firmware(ar); carl9170_usb_firmware_failed(ar); } static void carl9170_usb_firmware_step2(const struct firmware *fw, void *context) { struct ar9170 *ar = context; if (fw) { ar->fw.fw = fw; carl9170_usb_firmware_finish(ar); return; } dev_err(&ar->udev->dev, "firmware not found.\n"); carl9170_usb_firmware_failed(ar); } static int carl9170_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_endpoint_descriptor *ep; struct ar9170 *ar; struct usb_device *udev; int i, err; err = usb_reset_device(interface_to_usbdev(intf)); if (err) return err; ar = carl9170_alloc(sizeof(*ar)); if (IS_ERR(ar)) return PTR_ERR(ar); udev = interface_to_usbdev(intf); ar->udev = udev; ar->intf = intf; ar->features = id->driver_info; /* We need to remember the type of endpoint 4 because it differs * between high- and full-speed configuration. The high-speed * configuration specifies it as interrupt and the full-speed * configuration as bulk endpoint. This information is required * later when sending urbs to that endpoint. */ for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) { ep = &intf->cur_altsetting->endpoint[i].desc; if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD && usb_endpoint_dir_out(ep) && usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK) ar->usb_ep_cmd_is_bulk = true; } /* Verify that all expected endpoints are present */ if (ar->usb_ep_cmd_is_bulk) { u8 bulk_ep_addr[] = { AR9170_USB_EP_RX | USB_DIR_IN, AR9170_USB_EP_TX | USB_DIR_OUT, AR9170_USB_EP_CMD | USB_DIR_OUT, 0}; u8 int_ep_addr[] = { AR9170_USB_EP_IRQ | USB_DIR_IN, 0}; if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || !usb_check_int_endpoints(intf, int_ep_addr)) err = -ENODEV; } else { u8 bulk_ep_addr[] = { AR9170_USB_EP_RX | USB_DIR_IN, AR9170_USB_EP_TX | USB_DIR_OUT, 0}; u8 int_ep_addr[] = { AR9170_USB_EP_IRQ | USB_DIR_IN, AR9170_USB_EP_CMD | USB_DIR_OUT, 0}; if (!usb_check_bulk_endpoints(intf, bulk_ep_addr) || !usb_check_int_endpoints(intf, int_ep_addr)) err = -ENODEV; } if (err) { carl9170_free(ar); return err; } usb_set_intfdata(intf, ar); SET_IEEE80211_DEV(ar->hw, &intf->dev); init_usb_anchor(&ar->rx_anch); init_usb_anchor(&ar->rx_pool); init_usb_anchor(&ar->rx_work); init_usb_anchor(&ar->tx_wait); init_usb_anchor(&ar->tx_anch); init_usb_anchor(&ar->tx_cmd); init_usb_anchor(&ar->tx_err); init_completion(&ar->cmd_wait); init_completion(&ar->fw_boot_wait); init_completion(&ar->fw_load_wait); tasklet_setup(&ar->usb_tasklet, carl9170_usb_tasklet); atomic_set(&ar->tx_cmd_urbs, 0); atomic_set(&ar->tx_anch_urbs, 0); atomic_set(&ar->rx_work_urbs, 0); atomic_set(&ar->rx_anch_urbs, 0); atomic_set(&ar->rx_pool_urbs, 0); usb_get_intf(intf); carl9170_set_state(ar, CARL9170_STOPPED); err = request_firmware_nowait(THIS_MODULE, 1, CARL9170FW_NAME, &ar->udev->dev, GFP_KERNEL, ar, carl9170_usb_firmware_step2); if (err) { usb_put_intf(intf); carl9170_free(ar); } return err; } static void carl9170_usb_disconnect(struct usb_interface *intf) { struct ar9170 *ar = usb_get_intfdata(intf); if (WARN_ON(!ar)) return; wait_for_completion(&ar->fw_load_wait); if (IS_INITIALIZED(ar)) { carl9170_reboot(ar); carl9170_usb_stop(ar); } carl9170_usb_cancel_urbs(ar); carl9170_unregister(ar); usb_set_intfdata(intf, NULL); carl9170_release_firmware(ar); carl9170_free(ar); } #ifdef CONFIG_PM static int carl9170_usb_suspend(struct usb_interface *intf, pm_message_t message) { struct ar9170 *ar = usb_get_intfdata(intf); if (!ar) return -ENODEV; carl9170_usb_cancel_urbs(ar); return 0; } static int carl9170_usb_resume(struct usb_interface *intf) { struct ar9170 *ar = usb_get_intfdata(intf); int err; if (!ar) return -ENODEV; usb_unpoison_anchored_urbs(&ar->rx_anch); carl9170_set_state(ar, CARL9170_STOPPED); /* * The USB documentation demands that [for suspend] all traffic * to and from the device has to stop. This would be fine, but * there's a catch: the device[usb phy] does not come back. * * Upon resume the firmware will "kill" itself and the * boot-code sorts out the magic voodoo. * Not very nice, but there's not much what could go wrong. */ msleep(1100); err = carl9170_usb_init_device(ar); if (err) goto err_unrx; return 0; err_unrx: carl9170_usb_cancel_urbs(ar); return err; } #endif /* CONFIG_PM */ static struct usb_driver carl9170_driver = { .name = KBUILD_MODNAME, .probe = carl9170_usb_probe, .disconnect = carl9170_usb_disconnect, .id_table = carl9170_usb_ids, .soft_unbind = 1, #ifdef CONFIG_PM .suspend = carl9170_usb_suspend, .resume = carl9170_usb_resume, .reset_resume = carl9170_usb_resume, #endif /* CONFIG_PM */ .disable_hub_initiated_lpm = 1, }; module_usb_driver(carl9170_driver);
340 341 341 341 341 9 329 164 65 164 128 48 126 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/types.h> #include <linux/ipv6.h> #include <linux/in6.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/icmp.h> #include <linux/rcupdate.h> #include <linux/sysctl.h> #include <net/ipv6_frag.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> #endif #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> static DEFINE_MUTEX(defrag6_mutex); static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { u16 zone_id = NF_CT_DEFAULT_ZONE_ID; #if IS_ENABLED(CONFIG_NF_CONNTRACK) if (skb_nfct(skb)) { enum ip_conntrack_info ctinfo; const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); } #endif if (nf_bridge_in_prerouting(skb)) return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id; if (hooknum == NF_INET_PRE_ROUTING) return IP6_DEFRAG_CONNTRACK_IN + zone_id; else return IP6_DEFRAG_CONNTRACK_OUT + zone_id; } static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int err; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) return NF_ACCEPT; if (skb->_nfct == IP_CT_UNTRACKED) return NF_ACCEPT; #endif err = nf_ct_frag6_gather(state->net, skb, nf_ct6_defrag_user(state->hook, skb)); /* queued */ if (err == -EINPROGRESS) return NF_STOLEN; return err == 0 ? NF_ACCEPT : NF_DROP; } static const struct nf_hook_ops ipv6_defrag_ops[] = { { .hook = ipv6_defrag, .pf = NFPROTO_IPV6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, { .hook = ipv6_defrag, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, }, }; static void __net_exit defrag6_net_exit(struct net *net) { if (net->nf.defrag_ipv6_users) { nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); net->nf.defrag_ipv6_users = 0; } } static const struct nf_defrag_hook defrag_hook = { .owner = THIS_MODULE, .enable = nf_defrag_ipv6_enable, .disable = nf_defrag_ipv6_disable, }; static struct pernet_operations defrag6_net_ops = { .exit = defrag6_net_exit, }; static int __init nf_defrag_init(void) { int ret = 0; ret = nf_ct_frag6_init(); if (ret < 0) { pr_err("nf_defrag_ipv6: can't initialize frag6.\n"); return ret; } ret = register_pernet_subsys(&defrag6_net_ops); if (ret < 0) { pr_err("nf_defrag_ipv6: can't register pernet ops\n"); goto cleanup_frag6; } rcu_assign_pointer(nf_defrag_v6_hook, &defrag_hook); return ret; cleanup_frag6: nf_ct_frag6_cleanup(); return ret; } static void __exit nf_defrag_fini(void) { rcu_assign_pointer(nf_defrag_v6_hook, NULL); unregister_pernet_subsys(&defrag6_net_ops); nf_ct_frag6_cleanup(); } int nf_defrag_ipv6_enable(struct net *net) { int err = 0; mutex_lock(&defrag6_mutex); if (net->nf.defrag_ipv6_users == UINT_MAX) { err = -EOVERFLOW; goto out_unlock; } if (net->nf.defrag_ipv6_users) { net->nf.defrag_ipv6_users++; goto out_unlock; } err = nf_register_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); if (err == 0) net->nf.defrag_ipv6_users = 1; out_unlock: mutex_unlock(&defrag6_mutex); return err; } EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); void nf_defrag_ipv6_disable(struct net *net) { mutex_lock(&defrag6_mutex); if (net->nf.defrag_ipv6_users) { net->nf.defrag_ipv6_users--; if (net->nf.defrag_ipv6_users == 0) nf_unregister_net_hooks(net, ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); } mutex_unlock(&defrag6_mutex); } EXPORT_SYMBOL_GPL(nf_defrag_ipv6_disable); module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6 defragmentation support");
1 66 65 66 66 65 66 66 66 66 66 66 65 65 66 66 65 66 66 3 3 3 3 3 3 1 1 1 1 1 1 6 6 6 5 4 4 4 4 1 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 // SPDX-License-Identifier: GPL-2.0-only /* * sd.c Copyright (C) 1992 Drew Eckhardt * Copyright (C) 1993, 1994, 1995, 1999 Eric Youngdale * * Linux scsi disk driver * Initial versions: Drew Eckhardt * Subsequent revisions: Eric Youngdale * Modification history: * - Drew Eckhardt <drew@colorado.edu> original * - Eric Youngdale <eric@andante.org> add scatter-gather, multiple * outstanding request, and other enhancements. * Support loadable low-level scsi drivers. * - Jirka Hanika <geo@ff.cuni.cz> support more scsi disks using * eight major numbers. * - Richard Gooch <rgooch@atnf.csiro.au> support devfs. * - Torben Mathiasen <tmm@image.dk> Resource allocation fixes in * sd_init and cleanups. * - Alex Davis <letmein@erols.com> Fix problem where partition info * not being read in sd_open. Fix problem where removable media * could be ejected after sd_open. * - Douglas Gilbert <dgilbert@interlog.com> cleanup for lk 2.5.x * - Badari Pulavarty <pbadari@us.ibm.com>, Matthew Wilcox * <willy@debian.org>, Kurt Garloff <garloff@suse.de>: * Support 32k/1M disks. * * Logging policy (needs CONFIG_SCSI_LOGGING defined): * - setting up transfer: SCSI_LOG_HLQUEUE levels 1 and 2 * - end of transfer (bh + scsi_lib): SCSI_LOG_HLCOMPLETE level 1 * - entering sd_ioctl: SCSI_LOG_IOCTL level 1 * - entering other commands: SCSI_LOG_HLQUEUE level 3 * Note: when the logging level is set by the user, it must be greater * than the level indicated above to trigger output. */ #include <linux/bio-integrity.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/hdreg.h> #include <linux/errno.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/blkpg.h> #include <linux/blk-pm.h> #include <linux/delay.h> #include <linux/rw_hint.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/string_helpers.h> #include <linux/slab.h> #include <linux/sed-opal.h> #include <linux/pm_runtime.h> #include <linux/pr.h> #include <linux/t10-pi.h> #include <linux/uaccess.h> #include <linux/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> #include <scsi/scsi_devinfo.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_ioctl.h> #include <scsi/scsicam.h> #include <scsi/scsi_common.h> #include "sd.h" #include "scsi_priv.h" #include "scsi_logging.h" MODULE_AUTHOR("Eric Youngdale"); MODULE_DESCRIPTION("SCSI disk (sd) driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR); MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR); MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); MODULE_ALIAS_SCSI_DEVICE(TYPE_ZBC); #define SD_MINORS 16 static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned int mode); static void sd_config_write_same(struct scsi_disk *sdkp, struct queue_limits *lim); static int sd_revalidate_disk(struct gendisk *); static void sd_unlock_native_capacity(struct gendisk *disk); static void sd_shutdown(struct device *); static void scsi_disk_release(struct device *cdev); static DEFINE_IDA(sd_index_ida); static mempool_t *sd_page_pool; static struct lock_class_key sd_bio_compl_lkclass; static const char *sd_cache_types[] = { "write through", "none", "write back", "write back, no read (daft)" }; static void sd_set_flush_flag(struct scsi_disk *sdkp, struct queue_limits *lim) { if (sdkp->WCE) { lim->features |= BLK_FEAT_WRITE_CACHE; if (sdkp->DPOFUA) lim->features |= BLK_FEAT_FUA; else lim->features &= ~BLK_FEAT_FUA; } else { lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA); } } static ssize_t cache_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int ct, rcd, wce, sp; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; char buffer[64]; char *buffer_data; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; static const char temp[] = "temporary "; int len, ret; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) /* no cache control on RBC devices; theoretically they * can do it, but there's probably so many exceptions * it's not worth the risk */ return -EINVAL; if (strncmp(buf, temp, sizeof(temp) - 1) == 0) { buf += sizeof(temp) - 1; sdkp->cache_override = 1; } else { sdkp->cache_override = 0; } ct = sysfs_match_string(sd_cache_types, buf); if (ct < 0) return -EINVAL; rcd = ct & 0x01 ? 1 : 0; wce = (ct & 0x02) && !sdkp->write_prot ? 1 : 0; if (sdkp->cache_override) { struct queue_limits lim; sdkp->WCE = wce; sdkp->RCD = rcd; lim = queue_limits_start_update(sdkp->disk->queue); sd_set_flush_flag(sdkp, &lim); ret = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (ret) return ret; return count; } if (scsi_mode_sense(sdp, 0x08, 8, 0, buffer, sizeof(buffer), SD_TIMEOUT, sdkp->max_retries, &data, NULL)) return -EINVAL; len = min_t(size_t, sizeof(buffer), data.length - data.header_length - data.block_descriptor_length); buffer_data = buffer + data.header_length + data.block_descriptor_length; buffer_data[2] &= ~0x05; buffer_data[2] |= wce << 2 | rcd; sp = buffer_data[0] & 0x80 ? 1 : 0; buffer_data[0] &= ~0x80; /* * Ensure WP, DPOFUA, and RESERVED fields are cleared in * received mode parameter buffer before doing MODE SELECT. */ data.device_specific = 0; ret = scsi_mode_select(sdp, 1, sp, buffer_data, len, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (ret) { if (ret > 0 && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } sd_revalidate_disk(sdkp->disk); return count; } static ssize_t manage_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop && sdp->manage_runtime_start_stop && sdp->manage_shutdown); } static DEVICE_ATTR_RO(manage_start_stop); static ssize_t manage_system_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop); } static ssize_t manage_system_start_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_system_start_stop = v; return count; } static DEVICE_ATTR_RW(manage_system_start_stop); static ssize_t manage_runtime_start_stop_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop); } static ssize_t manage_runtime_start_stop_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_runtime_start_stop = v; return count; } static DEVICE_ATTR_RW(manage_runtime_start_stop); static ssize_t manage_shutdown_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; return sysfs_emit(buf, "%u\n", sdp->manage_shutdown); } static ssize_t manage_shutdown_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; bool v; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (kstrtobool(buf, &v)) return -EINVAL; sdp->manage_shutdown = v; return count; } static DEVICE_ATTR_RW(manage_shutdown); static ssize_t allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->device->allow_restart); } static ssize_t allow_restart_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { bool v; struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return -EINVAL; if (kstrtobool(buf, &v)) return -EINVAL; sdp->allow_restart = v; return count; } static DEVICE_ATTR_RW(allow_restart); static ssize_t cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); int ct = sdkp->RCD + 2*sdkp->WCE; return sprintf(buf, "%s\n", sd_cache_types[ct]); } static DEVICE_ATTR_RW(cache_type); static ssize_t FUA_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->DPOFUA); } static DEVICE_ATTR_RO(FUA); static ssize_t protection_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->protection_type); } static ssize_t protection_type_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); unsigned int val; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; err = kstrtouint(buf, 10, &val); if (err) return err; if (val <= T10_PI_TYPE3_PROTECTION) sdkp->protection_type = val; return count; } static DEVICE_ATTR_RW(protection_type); static ssize_t protection_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; unsigned int dif, dix; dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); dix = scsi_host_dix_capable(sdp->host, sdkp->protection_type); if (!dix && scsi_host_dix_capable(sdp->host, T10_PI_TYPE0_PROTECTION)) { dif = 0; dix = 1; } if (!dif && !dix) return sprintf(buf, "none\n"); return sprintf(buf, "%s%u\n", dix ? "dix" : "dif", dif); } static DEVICE_ATTR_RO(protection_mode); static ssize_t app_tag_own_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->ATO); } static DEVICE_ATTR_RO(app_tag_own); static ssize_t thin_provisioning_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->lbpme); } static DEVICE_ATTR_RO(thin_provisioning); /* sysfs_match_string() requires dense arrays */ static const char *lbp_mode[] = { [SD_LBP_FULL] = "full", [SD_LBP_UNMAP] = "unmap", [SD_LBP_WS16] = "writesame_16", [SD_LBP_WS10] = "writesame_10", [SD_LBP_ZERO] = "writesame_zero", [SD_LBP_DISABLE] = "disabled", }; static ssize_t provisioning_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%s\n", lbp_mode[sdkp->provisioning_mode]); } static ssize_t provisioning_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; struct queue_limits lim; int mode, err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK) return -EINVAL; mode = sysfs_match_string(lbp_mode, buf); if (mode < 0) return -EINVAL; lim = queue_limits_start_update(sdkp->disk->queue); sd_config_discard(sdkp, &lim, mode); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; } static DEVICE_ATTR_RW(provisioning_mode); /* sysfs_match_string() requires dense arrays */ static const char *zeroing_mode[] = { [SD_ZERO_WRITE] = "write", [SD_ZERO_WS] = "writesame", [SD_ZERO_WS16_UNMAP] = "writesame_16_unmap", [SD_ZERO_WS10_UNMAP] = "writesame_10_unmap", }; static ssize_t zeroing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%s\n", zeroing_mode[sdkp->zeroing_mode]); } static ssize_t zeroing_mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); int mode; if (!capable(CAP_SYS_ADMIN)) return -EACCES; mode = sysfs_match_string(zeroing_mode, buf); if (mode < 0) return -EINVAL; sdkp->zeroing_mode = mode; return count; } static DEVICE_ATTR_RW(zeroing_mode); static ssize_t max_medium_access_timeouts_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->max_medium_access_timeouts); } static ssize_t max_medium_access_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; err = kstrtouint(buf, 10, &sdkp->max_medium_access_timeouts); return err ? err : count; } static DEVICE_ATTR_RW(max_medium_access_timeouts); static ssize_t max_write_same_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%u\n", sdkp->max_ws_blocks); } static ssize_t max_write_same_blocks_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdp = sdkp->device; struct queue_limits lim; unsigned long max; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return -EINVAL; err = kstrtoul(buf, 10, &max); if (err) return err; if (max == 0) sdp->no_write_same = 1; else if (max <= SD_MAX_WS16_BLOCKS) { sdp->no_write_same = 0; sdkp->max_ws_blocks = max; } lim = queue_limits_start_update(sdkp->disk->queue); sd_config_write_same(sdkp, &lim); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; return count; } static DEVICE_ATTR_RW(max_write_same_blocks); static ssize_t zoned_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); if (sdkp->device->type == TYPE_ZBC) return sprintf(buf, "host-managed\n"); if (sdkp->zoned == 1) return sprintf(buf, "host-aware\n"); if (sdkp->zoned == 2) return sprintf(buf, "drive-managed\n"); return sprintf(buf, "none\n"); } static DEVICE_ATTR_RO(zoned_cap); static ssize_t max_retries_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct scsi_device *sdev = sdkp->device; int retries, err; err = kstrtoint(buf, 10, &retries); if (err) return err; if (retries == SCSI_CMD_RETRIES_NO_LIMIT || retries <= SD_MAX_RETRIES) { sdkp->max_retries = retries; return count; } sdev_printk(KERN_ERR, sdev, "max_retries must be between -1 and %d\n", SD_MAX_RETRIES); return -EINVAL; } static ssize_t max_retries_show(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_disk *sdkp = to_scsi_disk(dev); return sprintf(buf, "%d\n", sdkp->max_retries); } static DEVICE_ATTR_RW(max_retries); static struct attribute *sd_disk_attrs[] = { &dev_attr_cache_type.attr, &dev_attr_FUA.attr, &dev_attr_allow_restart.attr, &dev_attr_manage_start_stop.attr, &dev_attr_manage_system_start_stop.attr, &dev_attr_manage_runtime_start_stop.attr, &dev_attr_manage_shutdown.attr, &dev_attr_protection_type.attr, &dev_attr_protection_mode.attr, &dev_attr_app_tag_own.attr, &dev_attr_thin_provisioning.attr, &dev_attr_provisioning_mode.attr, &dev_attr_zeroing_mode.attr, &dev_attr_max_write_same_blocks.attr, &dev_attr_max_medium_access_timeouts.attr, &dev_attr_zoned_cap.attr, &dev_attr_max_retries.attr, NULL, }; ATTRIBUTE_GROUPS(sd_disk); static struct class sd_disk_class = { .name = "scsi_disk", .dev_release = scsi_disk_release, .dev_groups = sd_disk_groups, }; /* * Don't request a new module, as that could deadlock in multipath * environment. */ static void sd_default_probe(dev_t devt) { } /* * Device no to disk mapping: * * major disc2 disc p1 * |............|.............|....|....| <- dev_t * 31 20 19 8 7 4 3 0 * * Inside a major, we have 16k disks, however mapped non- * contiguously. The first 16 disks are for major0, the next * ones with major1, ... Disk 256 is for major0 again, disk 272 * for major1, ... * As we stay compatible with our numbering scheme, we can reuse * the well-know SCSI majors 8, 65--71, 136--143. */ static int sd_major(int major_idx) { switch (major_idx) { case 0: return SCSI_DISK0_MAJOR; case 1 ... 7: return SCSI_DISK1_MAJOR + major_idx - 1; case 8 ... 15: return SCSI_DISK8_MAJOR + major_idx - 8; default: BUG(); return 0; /* shut up gcc */ } } #ifdef CONFIG_BLK_SED_OPAL static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, bool send) { struct scsi_disk *sdkp = data; struct scsi_device *sdev = sdkp->device; u8 cdb[12] = { 0, }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, }; int ret; cdb[0] = send ? SECURITY_PROTOCOL_OUT : SECURITY_PROTOCOL_IN; cdb[1] = secp; put_unaligned_be16(spsp, &cdb[2]); put_unaligned_be32(len, &cdb[6]); ret = scsi_execute_cmd(sdev, cdb, send ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, buffer, len, SD_TIMEOUT, sdkp->max_retries, &exec_args); return ret <= 0 ? ret : -EIO; } #endif /* CONFIG_BLK_SED_OPAL */ /* * Look up the DIX operation based on whether the command is read or * write and whether dix and dif are enabled. */ static unsigned int sd_prot_op(bool write, bool dix, bool dif) { /* Lookup table: bit 2 (write), bit 1 (dix), bit 0 (dif) */ static const unsigned int ops[] = { /* wrt dix dif */ SCSI_PROT_NORMAL, /* 0 0 0 */ SCSI_PROT_READ_STRIP, /* 0 0 1 */ SCSI_PROT_READ_INSERT, /* 0 1 0 */ SCSI_PROT_READ_PASS, /* 0 1 1 */ SCSI_PROT_NORMAL, /* 1 0 0 */ SCSI_PROT_WRITE_INSERT, /* 1 0 1 */ SCSI_PROT_WRITE_STRIP, /* 1 1 0 */ SCSI_PROT_WRITE_PASS, /* 1 1 1 */ }; return ops[write << 2 | dix << 1 | dif]; } /* * Returns a mask of the protection flags that are valid for a given DIX * operation. */ static unsigned int sd_prot_flag_mask(unsigned int prot_op) { static const unsigned int flag_mask[] = { [SCSI_PROT_NORMAL] = 0, [SCSI_PROT_READ_STRIP] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT, [SCSI_PROT_READ_INSERT] = SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_READ_PASS] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_WRITE_INSERT] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_REF_INCREMENT, [SCSI_PROT_WRITE_STRIP] = SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, [SCSI_PROT_WRITE_PASS] = SCSI_PROT_TRANSFER_PI | SCSI_PROT_GUARD_CHECK | SCSI_PROT_REF_CHECK | SCSI_PROT_REF_INCREMENT | SCSI_PROT_IP_CHECKSUM, }; return flag_mask[prot_op]; } static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd, unsigned int dix, unsigned int dif) { struct request *rq = scsi_cmd_to_rq(scmd); struct bio *bio = rq->bio; unsigned int prot_op = sd_prot_op(rq_data_dir(rq), dix, dif); unsigned int protect = 0; if (dix) { /* DIX Type 0, 1, 2, 3 */ if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM)) scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM; if (bio_integrity_flagged(bio, BIP_CHECK_GUARD)) scmd->prot_flags |= SCSI_PROT_GUARD_CHECK; } if (dif != T10_PI_TYPE3_PROTECTION) { /* DIX/DIF Type 0, 1, 2 */ scmd->prot_flags |= SCSI_PROT_REF_INCREMENT; if (bio_integrity_flagged(bio, BIP_CHECK_REFTAG)) scmd->prot_flags |= SCSI_PROT_REF_CHECK; } if (dif) { /* DIX/DIF Type 1, 2, 3 */ scmd->prot_flags |= SCSI_PROT_TRANSFER_PI; if (bio_integrity_flagged(bio, BIP_DISK_NOCHECK)) protect = 3 << 5; /* Disable target PI checking */ else protect = 1 << 5; /* Enable target PI checking */ } scsi_set_prot_op(scmd, prot_op); scsi_set_prot_type(scmd, dif); scmd->prot_flags &= sd_prot_flag_mask(prot_op); return protect; } static void sd_disable_discard(struct scsi_disk *sdkp) { sdkp->provisioning_mode = SD_LBP_DISABLE; blk_queue_disable_discard(sdkp->disk->queue); } static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned int mode) { unsigned int logical_block_size = sdkp->device->sector_size; unsigned int max_blocks = 0; lim->discard_alignment = sdkp->unmap_alignment * logical_block_size; lim->discard_granularity = max(sdkp->physical_block_size, sdkp->unmap_granularity * logical_block_size); sdkp->provisioning_mode = mode; switch (mode) { case SD_LBP_FULL: case SD_LBP_DISABLE: break; case SD_LBP_UNMAP: max_blocks = min_not_zero(sdkp->max_unmap_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS16: if (sdkp->device->unmap_limit_for_ws) max_blocks = sdkp->max_unmap_blocks; else max_blocks = sdkp->max_ws_blocks; max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS16_BLOCKS); break; case SD_LBP_WS10: if (sdkp->device->unmap_limit_for_ws) max_blocks = sdkp->max_unmap_blocks; else max_blocks = sdkp->max_ws_blocks; max_blocks = min_not_zero(max_blocks, (u32)SD_MAX_WS10_BLOCKS); break; case SD_LBP_ZERO: max_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); break; } lim->max_hw_discard_sectors = max_blocks * (logical_block_size >> SECTOR_SHIFT); } static void *sd_set_special_bvec(struct request *rq, unsigned int data_len) { struct page *page; page = mempool_alloc(sd_page_pool, GFP_ATOMIC); if (!page) return NULL; clear_highpage(page); bvec_set_page(&rq->special_vec, page, data_len, 0); rq->rq_flags |= RQF_SPECIAL_PAYLOAD; return bvec_virt(&rq->special_vec); } static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); unsigned int data_len = 24; char *buf; buf = sd_set_special_bvec(rq, data_len); if (!buf) return BLK_STS_RESOURCE; cmd->cmd_len = 10; cmd->cmnd[0] = UNMAP; cmd->cmnd[8] = 24; put_unaligned_be16(6 + 16, &buf[0]); put_unaligned_be16(16, &buf[2]); put_unaligned_be64(lba, &buf[8]); put_unaligned_be32(nr_blocks, &buf[16]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = SD_TIMEOUT; return scsi_alloc_sgtables(cmd); } static void sd_config_atomic(struct scsi_disk *sdkp, struct queue_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size, physical_block_size_sectors, max_atomic, unit_min, unit_max; if ((!sdkp->max_atomic && !sdkp->max_atomic_with_boundary) || sdkp->protection_type == T10_PI_TYPE2_PROTECTION) return; physical_block_size_sectors = sdkp->physical_block_size / sdkp->device->sector_size; unit_min = rounddown_pow_of_two(sdkp->atomic_granularity ? sdkp->atomic_granularity : physical_block_size_sectors); /* * Only use atomic boundary when we have the odd scenario of * sdkp->max_atomic == 0, which the spec does permit. */ if (sdkp->max_atomic) { max_atomic = sdkp->max_atomic; unit_max = rounddown_pow_of_two(sdkp->max_atomic); sdkp->use_atomic_write_boundary = 0; } else { max_atomic = sdkp->max_atomic_with_boundary; unit_max = rounddown_pow_of_two(sdkp->max_atomic_boundary); sdkp->use_atomic_write_boundary = 1; } /* * Ensure compliance with granularity and alignment. For now, keep it * simple and just don't support atomic writes for values mismatched * with max_{boundary}atomic, physical block size, and * atomic_granularity itself. * * We're really being distrustful by checking unit_max also... */ if (sdkp->atomic_granularity > 1) { if (unit_min > 1 && unit_min % sdkp->atomic_granularity) return; if (unit_max > 1 && unit_max % sdkp->atomic_granularity) return; } if (sdkp->atomic_alignment > 1) { if (unit_min > 1 && unit_min % sdkp->atomic_alignment) return; if (unit_max > 1 && unit_max % sdkp->atomic_alignment) return; } lim->atomic_write_hw_max = max_atomic * logical_block_size; lim->atomic_write_hw_boundary = 0; lim->atomic_write_hw_unit_min = unit_min * logical_block_size; lim->atomic_write_hw_unit_max = unit_max * logical_block_size; lim->features |= BLK_FEAT_ATOMIC_WRITES; } static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; cmd->cmd_len = 16; cmd->cmnd[0] = WRITE_SAME_16; if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be32(nr_blocks, &cmd->cmnd[10]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; return scsi_alloc_sgtables(cmd); } static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap) { struct scsi_device *sdp = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); u32 data_len = sdp->sector_size; if (!sd_set_special_bvec(rq, data_len)) return BLK_STS_RESOURCE; cmd->cmd_len = 10; cmd->cmnd[0] = WRITE_SAME; if (unmap) cmd->cmnd[1] = 0x8; /* UNMAP */ put_unaligned_be32(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[7]); cmd->allowed = sdkp->max_retries; cmd->transfersize = data_len; rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT; return scsi_alloc_sgtables(cmd); } static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_device *sdp = cmd->device; struct scsi_disk *sdkp = scsi_disk(rq->q->disk); u64 lba = sectors_to_logical(sdp, blk_rq_pos(rq)); u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); if (!(rq->cmd_flags & REQ_NOUNMAP)) { switch (sdkp->zeroing_mode) { case SD_ZERO_WS16_UNMAP: return sd_setup_write_same16_cmnd(cmd, true); case SD_ZERO_WS10_UNMAP: return sd_setup_write_same10_cmnd(cmd, true); } } if (sdp->no_write_same) { rq->rq_flags |= RQF_QUIET; return BLK_STS_TARGET; } if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); return sd_setup_write_same10_cmnd(cmd, false); } static void sd_disable_write_same(struct scsi_disk *sdkp) { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; blk_queue_disable_write_zeroes(sdkp->disk->queue); } static void sd_config_write_same(struct scsi_disk *sdkp, struct queue_limits *lim) { unsigned int logical_block_size = sdkp->device->sector_size; if (sdkp->device->no_write_same) { sdkp->max_ws_blocks = 0; goto out; } /* Some devices can not handle block counts above 0xffff despite * supporting WRITE SAME(16). Consequently we default to 64k * blocks per I/O unless the device explicitly advertises a * bigger limit. */ if (sdkp->max_ws_blocks > SD_MAX_WS10_BLOCKS) sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS16_BLOCKS); else if (sdkp->ws16 || sdkp->ws10 || sdkp->device->no_report_opcodes) sdkp->max_ws_blocks = min_not_zero(sdkp->max_ws_blocks, (u32)SD_MAX_WS10_BLOCKS); else { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; } if (sdkp->lbprz && sdkp->lbpws) sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP; else if (sdkp->lbprz && sdkp->lbpws10) sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP; else if (sdkp->max_ws_blocks) sdkp->zeroing_mode = SD_ZERO_WS; else sdkp->zeroing_mode = SD_ZERO_WRITE; if (sdkp->max_ws_blocks && sdkp->physical_block_size > logical_block_size) { /* * Reporting a maximum number of blocks that is not aligned * on the device physical size would cause a large write same * request to be split into physically unaligned chunks by * __blkdev_issue_write_zeroes() even if the caller of this * functions took care to align the large request. So make sure * the maximum reported is aligned to the device physical block * size. This is only an optional optimization for regular * disks, but this is mandatory to avoid failure of large write * same requests directed at sequential write required zones of * host-managed ZBC disks. */ sdkp->max_ws_blocks = round_down(sdkp->max_ws_blocks, bytes_to_logical(sdkp->device, sdkp->physical_block_size)); } out: lim->max_write_zeroes_sectors = sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT); } static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); /* flush requests don't perform I/O, zero the S/G table */ memset(&cmd->sdb, 0, sizeof(cmd->sdb)); if (cmd->device->use_16_for_sync) { cmd->cmnd[0] = SYNCHRONIZE_CACHE_16; cmd->cmd_len = 16; } else { cmd->cmnd[0] = SYNCHRONIZE_CACHE; cmd->cmd_len = 10; } cmd->transfersize = 0; cmd->allowed = sdkp->max_retries; rq->timeout = rq->q->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; return BLK_STS_OK; } /** * sd_group_number() - Compute the GROUP NUMBER field * @cmd: SCSI command for which to compute the value of the six-bit GROUP NUMBER * field. * * From SBC-5 r05 (https://www.t10.org/cgi-bin/ac.pl?t=f&f=sbc5r05.pdf): * 0: no relative lifetime. * 1: shortest relative lifetime. * 2: second shortest relative lifetime. * 3 - 0x3d: intermediate relative lifetimes. * 0x3e: second longest relative lifetime. * 0x3f: longest relative lifetime. */ static u8 sd_group_number(struct scsi_cmnd *cmd) { const struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); if (!sdkp->rscs) return 0; return min3((u32)rq->bio->bi_write_hint, (u32)sdkp->permanent_stream_count, 0x3fu); } static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags, unsigned int dld) { cmd->cmd_len = SD_EXT_CDB_SIZE; cmd->cmnd[0] = VARIABLE_LENGTH_CMD; cmd->cmnd[6] = sd_group_number(cmd); cmd->cmnd[7] = 0x18; /* Additional CDB len */ cmd->cmnd[9] = write ? WRITE_32 : READ_32; cmd->cmnd[10] = flags; cmd->cmnd[11] = dld & 0x07; put_unaligned_be64(lba, &cmd->cmnd[12]); put_unaligned_be32(lba, &cmd->cmnd[20]); /* Expected Indirect LBA */ put_unaligned_be32(nr_blocks, &cmd->cmnd[28]); return BLK_STS_OK; } static blk_status_t sd_setup_rw16_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags, unsigned int dld) { cmd->cmd_len = 16; cmd->cmnd[0] = write ? WRITE_16 : READ_16; cmd->cmnd[1] = flags | ((dld >> 2) & 0x01); cmd->cmnd[14] = ((dld & 0x03) << 6) | sd_group_number(cmd); cmd->cmnd[15] = 0; put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be32(nr_blocks, &cmd->cmnd[10]); return BLK_STS_OK; } static blk_status_t sd_setup_rw10_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags) { cmd->cmd_len = 10; cmd->cmnd[0] = write ? WRITE_10 : READ_10; cmd->cmnd[1] = flags; cmd->cmnd[6] = sd_group_number(cmd); cmd->cmnd[9] = 0; put_unaligned_be32(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[7]); return BLK_STS_OK; } static blk_status_t sd_setup_rw6_cmnd(struct scsi_cmnd *cmd, bool write, sector_t lba, unsigned int nr_blocks, unsigned char flags) { /* Avoid that 0 blocks gets translated into 256 blocks. */ if (WARN_ON_ONCE(nr_blocks == 0)) return BLK_STS_IOERR; if (unlikely(flags & 0x8)) { /* * This happens only if this drive failed 10byte rw * command with ILLEGAL_REQUEST during operation and * thus turned off use_10_for_rw. */ scmd_printk(KERN_ERR, cmd, "FUA write on READ/WRITE(6) drive\n"); return BLK_STS_IOERR; } cmd->cmd_len = 6; cmd->cmnd[0] = write ? WRITE_6 : READ_6; cmd->cmnd[1] = (lba >> 16) & 0x1f; cmd->cmnd[2] = (lba >> 8) & 0xff; cmd->cmnd[3] = lba & 0xff; cmd->cmnd[4] = nr_blocks; cmd->cmnd[5] = 0; return BLK_STS_OK; } /* * Check if a command has a duration limit set. If it does, and the target * device supports CDL and the feature is enabled, return the limit * descriptor index to use. Return 0 (no limit) otherwise. */ static int sd_cdl_dld(struct scsi_disk *sdkp, struct scsi_cmnd *scmd) { struct scsi_device *sdp = sdkp->device; int hint; if (!sdp->cdl_supported || !sdp->cdl_enable) return 0; /* * Use "no limit" if the request ioprio does not specify a duration * limit hint. */ hint = IOPRIO_PRIO_HINT(req_get_ioprio(scsi_cmd_to_rq(scmd))); if (hint < IOPRIO_HINT_DEV_DURATION_LIMIT_1 || hint > IOPRIO_HINT_DEV_DURATION_LIMIT_7) return 0; return (hint - IOPRIO_HINT_DEV_DURATION_LIMIT_1) + 1; } static blk_status_t sd_setup_atomic_cmnd(struct scsi_cmnd *cmd, sector_t lba, unsigned int nr_blocks, bool boundary, unsigned char flags) { cmd->cmd_len = 16; cmd->cmnd[0] = WRITE_ATOMIC_16; cmd->cmnd[1] = flags; put_unaligned_be64(lba, &cmd->cmnd[2]); put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); if (boundary) put_unaligned_be16(nr_blocks, &cmd->cmnd[10]); else put_unaligned_be16(0, &cmd->cmnd[10]); put_unaligned_be16(nr_blocks, &cmd->cmnd[12]); cmd->cmnd[14] = 0; cmd->cmnd[15] = 0; return BLK_STS_OK; } static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_device *sdp = cmd->device; struct scsi_disk *sdkp = scsi_disk(rq->q->disk); sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq)); sector_t threshold; unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); unsigned int mask = logical_to_sectors(sdp, 1) - 1; bool write = rq_data_dir(rq) == WRITE; unsigned char protect, fua; unsigned int dld; blk_status_t ret; unsigned int dif; bool dix; ret = scsi_alloc_sgtables(cmd); if (ret != BLK_STS_OK) return ret; ret = BLK_STS_IOERR; if (!scsi_device_online(sdp) || sdp->changed) { scmd_printk(KERN_ERR, cmd, "device offline or changed\n"); goto fail; } if (blk_rq_pos(rq) + blk_rq_sectors(rq) > get_capacity(rq->q->disk)) { scmd_printk(KERN_ERR, cmd, "access beyond end of device\n"); goto fail; } if ((blk_rq_pos(rq) & mask) || (blk_rq_sectors(rq) & mask)) { scmd_printk(KERN_ERR, cmd, "request not aligned to the logical block size\n"); goto fail; } /* * Some SD card readers can't handle accesses which touch the * last one or two logical blocks. Split accesses as needed. */ threshold = sdkp->capacity - SD_LAST_BUGGY_SECTORS; if (unlikely(sdp->last_sector_bug && lba + nr_blocks > threshold)) { if (lba < threshold) { /* Access up to the threshold but not beyond */ nr_blocks = threshold - lba; } else { /* Access only a single logical block */ nr_blocks = 1; } } fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0; dix = scsi_prot_sg_count(cmd); dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type); dld = sd_cdl_dld(sdkp, cmd); if (dif || dix) protect = sd_setup_protect_cmnd(cmd, dix, dif); else protect = 0; if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { ret = sd_setup_rw32_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); } else if (rq->cmd_flags & REQ_ATOMIC) { ret = sd_setup_atomic_cmnd(cmd, lba, nr_blocks, sdkp->use_atomic_write_boundary, protect | fua); } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks, protect | fua, dld); } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw || protect || rq->bio->bi_write_hint) { ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks, protect | fua); } else { ret = sd_setup_rw6_cmnd(cmd, write, lba, nr_blocks, protect | fua); } if (unlikely(ret != BLK_STS_OK)) goto fail; /* * We shouldn't disconnect in the middle of a sector, so with a dumb * host adapter, it's safe to assume that we can at least transfer * this many bytes between each connect / disconnect. */ cmd->transfersize = sdp->sector_size; cmd->underflow = nr_blocks << 9; cmd->allowed = sdkp->max_retries; cmd->sdb.length = nr_blocks * sdp->sector_size; SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, cmd, "%s: block=%llu, count=%d\n", __func__, (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq))); SCSI_LOG_HLQUEUE(2, scmd_printk(KERN_INFO, cmd, "%s %d/%u 512 byte blocks.\n", write ? "writing" : "reading", nr_blocks, blk_rq_sectors(rq))); /* * This indicates that the command is ready from our end to be queued. */ return BLK_STS_OK; fail: scsi_free_sgtables(cmd); return ret; } static blk_status_t sd_init_command(struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); switch (req_op(rq)) { case REQ_OP_DISCARD: switch (scsi_disk(rq->q->disk)->provisioning_mode) { case SD_LBP_UNMAP: return sd_setup_unmap_cmnd(cmd); case SD_LBP_WS16: return sd_setup_write_same16_cmnd(cmd, true); case SD_LBP_WS10: return sd_setup_write_same10_cmnd(cmd, true); case SD_LBP_ZERO: return sd_setup_write_same10_cmnd(cmd, false); default: return BLK_STS_TARGET; } case REQ_OP_WRITE_ZEROES: return sd_setup_write_zeroes_cmnd(cmd); case REQ_OP_FLUSH: return sd_setup_flush_cmnd(cmd); case REQ_OP_READ: case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); case REQ_OP_ZONE_RESET: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, false); case REQ_OP_ZONE_RESET_ALL: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER, true); case REQ_OP_ZONE_OPEN: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_OPEN_ZONE, false); case REQ_OP_ZONE_CLOSE: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_CLOSE_ZONE, false); case REQ_OP_ZONE_FINISH: return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_FINISH_ZONE, false); default: WARN_ON_ONCE(1); return BLK_STS_NOTSUPP; } } static void sd_uninit_command(struct scsi_cmnd *SCpnt) { struct request *rq = scsi_cmd_to_rq(SCpnt); if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) mempool_free(rq->special_vec.bv_page, sd_page_pool); } static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp) { if (sdkp->device->removable || sdkp->write_prot) { if (disk_check_media_change(disk)) return true; } /* * Force a full rescan after ioctl(BLKRRPART). While the disk state has * nothing to do with partitions, BLKRRPART is used to force a full * revalidate after things like a format for historical reasons. */ return test_bit(GD_NEED_PART_SCAN, &disk->state); } /** * sd_open - open a scsi disk device * @disk: disk to open * @mode: open mode * * Returns 0 if successful. Returns a negated errno value in case * of error. * * Note: This can be called from a user context (e.g. fsck(1) ) * or from within the kernel (e.g. as a result of a mount(1) ). * In the latter case @inode and @filp carry an abridged amount * of information as noted above. * * Locking: called with disk->open_mutex held. **/ static int sd_open(struct gendisk *disk, blk_mode_t mode) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdev = sdkp->device; int retval; if (scsi_device_get(sdev)) return -ENXIO; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_open\n")); /* * If the device is in error recovery, wait until it is done. * If the device is offline, then disallow any access to it. */ retval = -ENXIO; if (!scsi_block_when_processing_errors(sdev)) goto error_out; if (sd_need_revalidate(disk, sdkp)) sd_revalidate_disk(disk); /* * If the drive is empty, just let the open fail. */ retval = -ENOMEDIUM; if (sdev->removable && !sdkp->media_present && !(mode & BLK_OPEN_NDELAY)) goto error_out; /* * If the device has the write protect tab set, have the open fail * if the user expects to be able to write to the thing. */ retval = -EROFS; if (sdkp->write_prot && (mode & BLK_OPEN_WRITE)) goto error_out; /* * It is possible that the disk changing stuff resulted in * the device being taken offline. If this is the case, * report this to the user, and don't pretend that the * open actually succeeded. */ retval = -ENXIO; if (!scsi_device_online(sdev)) goto error_out; if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT); } return 0; error_out: scsi_device_put(sdev); return retval; } /** * sd_release - invoked when the (last) close(2) is called on this * scsi disk. * @disk: disk to release * * Returns 0. * * Note: may block (uninterruptible) if error recovery is underway * on this disk. * * Locking: called with disk->open_mutex held. **/ static void sd_release(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdev = sdkp->device; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); if (atomic_dec_return(&sdkp->openers) == 0 && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } scsi_device_put(sdev); } static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdp = sdkp->device; struct Scsi_Host *host = sdp->host; sector_t capacity = logical_to_sectors(sdp, sdkp->capacity); int diskinfo[4]; /* default to most commonly used values */ diskinfo[0] = 0x40; /* 1 << 6 */ diskinfo[1] = 0x20; /* 1 << 5 */ diskinfo[2] = capacity >> 11; /* override with calculated, extended default, or driver values */ if (host->hostt->bios_param) host->hostt->bios_param(sdp, bdev, capacity, diskinfo); else scsicam_bios_param(bdev, capacity, diskinfo); geo->heads = diskinfo[0]; geo->sectors = diskinfo[1]; geo->cylinders = diskinfo[2]; return 0; } /** * sd_ioctl - process an ioctl * @bdev: target block device * @mode: open mode * @cmd: ioctl command number * @arg: this is third argument given to ioctl(2) system call. * Often contains a pointer. * * Returns 0 if successful (some ioctls return positive numbers on * success as well). Returns a negated errno value in case of error. * * Note: most ioctls are forward onto the block subsystem or further * down in the scsi subsystem. **/ static int sd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct gendisk *disk = bdev->bd_disk; struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; void __user *p = (void __user *)arg; int error; SCSI_LOG_IOCTL(1, sd_printk(KERN_INFO, sdkp, "sd_ioctl: disk=%s, " "cmd=0x%x\n", disk->disk_name, cmd)); if (bdev_is_partition(bdev) && !capable(CAP_SYS_RAWIO)) return -ENOIOCTLCMD; /* * If we are in the middle of error recovery, don't let anyone * else try and use this device. Also, if error recovery fails, it * may try and take the device offline, in which case all further * access to the device is prohibited. */ error = scsi_ioctl_block_when_processing_errors(sdp, cmd, (mode & BLK_OPEN_NDELAY)); if (error) return error; if (is_sed_ioctl(cmd)) return sed_ioctl(sdkp->opal_dev, cmd, p); return scsi_ioctl(sdp, mode & BLK_OPEN_WRITE, cmd, p); } static void set_media_not_present(struct scsi_disk *sdkp) { if (sdkp->media_present) sdkp->device->changed = 1; if (sdkp->device->removable) { sdkp->media_present = 0; sdkp->capacity = 0; } } static int media_not_present(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { if (!scsi_sense_valid(sshdr)) return 0; /* not invoked for commands that could return deferred errors */ switch (sshdr->sense_key) { case UNIT_ATTENTION: case NOT_READY: /* medium not present */ if (sshdr->asc == 0x3A) { set_media_not_present(sdkp); return 1; } } return 0; } /** * sd_check_events - check media events * @disk: kernel device descriptor * @clearing: disk events currently being cleared * * Returns mask of DISK_EVENT_*. * * Note: this function is invoked from the block subsystem. **/ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing) { struct scsi_disk *sdkp = disk->private_data; struct scsi_device *sdp; int retval; bool disk_changed; if (!sdkp) return 0; sdp = sdkp->device; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_check_events\n")); /* * If the device is offline, don't send any commands - just pretend as * if the command failed. If the device ever comes back online, we * can deal with it then. It is only because of unrecoverable errors * that we would ever take a device offline in the first place. */ if (!scsi_device_online(sdp)) { set_media_not_present(sdkp); goto out; } /* * Using TEST_UNIT_READY enables differentiation between drive with * no cartridge loaded - NOT READY, drive with changed cartridge - * UNIT ATTENTION, or with same cartridge - GOOD STATUS. * * Drives that auto spin down. eg iomega jaz 1G, will be started * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever * sd_revalidate() is called. */ if (scsi_block_when_processing_errors(sdp)) { struct scsi_sense_hdr sshdr = { 0, }; retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, sdkp->max_retries, &sshdr); /* failed to execute TUR, assume media not present */ if (retval < 0 || host_byte(retval)) { set_media_not_present(sdkp); goto out; } if (media_not_present(sdkp, &sshdr)) goto out; } /* * For removable scsi disk we have to recognise the presence * of a disk in the drive. */ if (!sdkp->media_present) sdp->changed = 1; sdkp->media_present = 1; out: /* * sdp->changed is set under the following conditions: * * Medium present state has changed in either direction. * Device has indicated UNIT_ATTENTION. */ disk_changed = sdp->changed; sdp->changed = 0; return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0; } static int sd_sync_cache(struct scsi_disk *sdkp) { int res; struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout * SD_FLUSH_TIMEOUT_MULTIPLIER; /* Leave the rest of the command zero to indicate flush everything. */ const unsigned char cmd[16] = { sdp->use_16_for_sync ? SYNCHRONIZE_CACHE_16 : SYNCHRONIZE_CACHE }; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { .allowed = 3, .result = SCMD_FAILURE_RESULT_ANY, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, .sshdr = &sshdr, .failures = &failures, }; if (!scsi_device_online(sdp)) return -ENODEV; res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Synchronize Cache(10) failed", res); if (res < 0) return res; if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr)) { sd_print_sense_hdr(sdkp, &sshdr); /* we need to evaluate the error return */ if (sshdr.asc == 0x3a || /* medium not present */ sshdr.asc == 0x20 || /* invalid command */ (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */ /* this is no error here */ return 0; /* * If a format is in progress or if the drive does not * support sync, there is not much we can do because * this is called during shutdown or suspend so just * return success so those operations can proceed. */ if ((sshdr.asc == 0x04 && sshdr.ascq == 0x04) || sshdr.sense_key == ILLEGAL_REQUEST) return 0; } switch (host_byte(res)) { /* ignore errors due to racing a disconnection */ case DID_BAD_TARGET: case DID_NO_CONNECT: return 0; /* signal the upper layer it might try again */ case DID_BUS_BUSY: case DID_IMM_RETRY: case DID_REQUEUE: case DID_SOFT_ERROR: return -EBUSY; default: return -EIO; } } return 0; } static void sd_rescan(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); sd_revalidate_disk(sdkp->disk); } static int sd_get_unique_id(struct gendisk *disk, u8 id[16], enum blk_unique_id type) { struct scsi_device *sdev = scsi_disk(disk)->device; const struct scsi_vpd *vpd; const unsigned char *d; int ret = -ENXIO, len; rcu_read_lock(); vpd = rcu_dereference(sdev->vpd_pg83); if (!vpd) goto out_unlock; ret = -EINVAL; for (d = vpd->data + 4; d < vpd->data + vpd->len; d += d[3] + 4) { /* we only care about designators with LU association */ if (((d[1] >> 4) & 0x3) != 0x00) continue; if ((d[1] & 0xf) != type) continue; /* * Only exit early if a 16-byte descriptor was found. Otherwise * keep looking as one with more entropy might still show up. */ len = d[3]; if (len != 8 && len != 12 && len != 16) continue; ret = len; memcpy(id, d + 4, len); if (len == 16) break; } out_unlock: rcu_read_unlock(); return ret; } static int sd_scsi_to_pr_err(struct scsi_sense_hdr *sshdr, int result) { switch (host_byte(result)) { case DID_TRANSPORT_MARGINAL: case DID_TRANSPORT_DISRUPTED: case DID_BUS_BUSY: return PR_STS_RETRY_PATH_FAILURE; case DID_NO_CONNECT: return PR_STS_PATH_FAILED; case DID_TRANSPORT_FAILFAST: return PR_STS_PATH_FAST_FAILED; } switch (status_byte(result)) { case SAM_STAT_RESERVATION_CONFLICT: return PR_STS_RESERVATION_CONFLICT; case SAM_STAT_CHECK_CONDITION: if (!scsi_sense_valid(sshdr)) return PR_STS_IOERR; if (sshdr->sense_key == ILLEGAL_REQUEST && (sshdr->asc == 0x26 || sshdr->asc == 0x24)) return -EINVAL; fallthrough; default: return PR_STS_IOERR; } } static int sd_pr_in_command(struct block_device *bdev, u8 sa, unsigned char *data, int data_len) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; u8 cmd[10] = { PERSISTENT_RESERVE_IN, sa }; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = 5, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int result; put_unaligned_be16(data_len, &cmd[7]); result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, data, data_len, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (scsi_status_is_check_condition(result) && scsi_sense_valid(&sshdr)) { sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); } if (result <= 0) return result; return sd_scsi_to_pr_err(&sshdr, result); } static int sd_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) { int result, i, data_offset, num_copy_keys; u32 num_keys = keys_info->num_keys; int data_len = num_keys * 8 + 8; u8 *data; data = kzalloc(data_len, GFP_KERNEL); if (!data) return -ENOMEM; result = sd_pr_in_command(bdev, READ_KEYS, data, data_len); if (result) goto free_data; keys_info->generation = get_unaligned_be32(&data[0]); keys_info->num_keys = get_unaligned_be32(&data[4]) / 8; data_offset = 8; num_copy_keys = min(num_keys, keys_info->num_keys); for (i = 0; i < num_copy_keys; i++) { keys_info->keys[i] = get_unaligned_be64(&data[data_offset]); data_offset += 8; } free_data: kfree(data); return result; } static int sd_pr_read_reservation(struct block_device *bdev, struct pr_held_reservation *rsv) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; u8 data[24] = { }; int result, len; result = sd_pr_in_command(bdev, READ_RESERVATION, data, sizeof(data)); if (result) return result; len = get_unaligned_be32(&data[4]); if (!len) return 0; /* Make sure we have at least the key and type */ if (len < 14) { sdev_printk(KERN_INFO, sdev, "READ RESERVATION failed due to short return buffer of %d bytes\n", len); return -EINVAL; } rsv->generation = get_unaligned_be32(&data[0]); rsv->key = get_unaligned_be64(&data[8]); rsv->type = scsi_pr_type_to_block(data[21] & 0x0f); return 0; } static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key, u64 sa_key, enum scsi_pr_type type, u8 flags) { struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk); struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = 5, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int result; u8 cmd[16] = { 0, }; u8 data[24] = { 0, }; cmd[0] = PERSISTENT_RESERVE_OUT; cmd[1] = sa; cmd[2] = type; put_unaligned_be32(sizeof(data), &cmd[5]); put_unaligned_be64(key, &data[0]); put_unaligned_be64(sa_key, &data[8]); data[20] = flags; result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, &data, sizeof(data), SD_TIMEOUT, sdkp->max_retries, &exec_args); if (scsi_status_is_check_condition(result) && scsi_sense_valid(&sshdr)) { sdev_printk(KERN_INFO, sdev, "PR command failed: %d\n", result); scsi_print_sense_hdr(sdev, NULL, &sshdr); } if (result <= 0) return result; return sd_scsi_to_pr_err(&sshdr, result); } static int sd_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, u32 flags) { if (flags & ~PR_FL_IGNORE_KEY) return -EOPNOTSUPP; return sd_pr_out_command(bdev, (flags & PR_FL_IGNORE_KEY) ? 0x06 : 0x00, old_key, new_key, 0, (1 << 0) /* APTPL */); } static int sd_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, u32 flags) { if (flags) return -EOPNOTSUPP; return sd_pr_out_command(bdev, 0x01, key, 0, block_pr_type_to_scsi(type), 0); } static int sd_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { return sd_pr_out_command(bdev, 0x02, key, 0, block_pr_type_to_scsi(type), 0); } static int sd_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort) { return sd_pr_out_command(bdev, abort ? 0x05 : 0x04, old_key, new_key, block_pr_type_to_scsi(type), 0); } static int sd_pr_clear(struct block_device *bdev, u64 key) { return sd_pr_out_command(bdev, 0x03, key, 0, 0, 0); } static const struct pr_ops sd_pr_ops = { .pr_register = sd_pr_register, .pr_reserve = sd_pr_reserve, .pr_release = sd_pr_release, .pr_preempt = sd_pr_preempt, .pr_clear = sd_pr_clear, .pr_read_keys = sd_pr_read_keys, .pr_read_reservation = sd_pr_read_reservation, }; static void scsi_disk_free_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); put_device(&sdkp->disk_dev); } static const struct block_device_operations sd_fops = { .owner = THIS_MODULE, .open = sd_open, .release = sd_release, .ioctl = sd_ioctl, .getgeo = sd_getgeo, .compat_ioctl = blkdev_compat_ptr_ioctl, .check_events = sd_check_events, .unlock_native_capacity = sd_unlock_native_capacity, .report_zones = sd_zbc_report_zones, .get_unique_id = sd_get_unique_id, .free_disk = scsi_disk_free_disk, .pr_ops = &sd_pr_ops, }; /** * sd_eh_reset - reset error handling callback * @scmd: sd-issued command that has failed * * This function is called by the SCSI midlayer before starting * SCSI EH. When counting medium access failures we have to be * careful to register it only only once per device and SCSI EH run; * there might be several timed out commands which will cause the * 'max_medium_access_timeouts' counter to trigger after the first * SCSI EH run already and set the device to offline. * So this function resets the internal counter before starting SCSI EH. **/ static void sd_eh_reset(struct scsi_cmnd *scmd) { struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk); /* New SCSI EH run, reset gate variable */ sdkp->ignore_medium_access_errors = false; } /** * sd_eh_action - error handling callback * @scmd: sd-issued command that has failed * @eh_disp: The recovery disposition suggested by the midlayer * * This function is called by the SCSI midlayer upon completion of an * error test command (currently TEST UNIT READY). The result of sending * the eh command is passed in eh_disp. We're looking for devices that * fail medium access commands but are OK with non access commands like * test unit ready (so wrongly see the device as having a successful * recovery) **/ static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp) { struct scsi_disk *sdkp = scsi_disk(scsi_cmd_to_rq(scmd)->q->disk); struct scsi_device *sdev = scmd->device; if (!scsi_device_online(sdev) || !scsi_medium_access_command(scmd) || host_byte(scmd->result) != DID_TIME_OUT || eh_disp != SUCCESS) return eh_disp; /* * The device has timed out executing a medium access command. * However, the TEST UNIT READY command sent during error * handling completed successfully. Either the device is in the * process of recovering or has it suffered an internal failure * that prevents access to the storage medium. */ if (!sdkp->ignore_medium_access_errors) { sdkp->medium_access_timed_out++; sdkp->ignore_medium_access_errors = true; } /* * If the device keeps failing read/write commands but TEST UNIT * READY always completes successfully we assume that medium * access is no longer possible and take the device offline. */ if (sdkp->medium_access_timed_out >= sdkp->max_medium_access_timeouts) { scmd_printk(KERN_ERR, scmd, "Medium access timeout failure. Offlining disk!\n"); mutex_lock(&sdev->state_mutex); scsi_device_set_state(sdev, SDEV_OFFLINE); mutex_unlock(&sdev->state_mutex); return SUCCESS; } return eh_disp; } static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd) { struct request *req = scsi_cmd_to_rq(scmd); struct scsi_device *sdev = scmd->device; unsigned int transferred, good_bytes; u64 start_lba, end_lba, bad_lba; /* * Some commands have a payload smaller than the device logical * block size (e.g. INQUIRY on a 4K disk). */ if (scsi_bufflen(scmd) <= sdev->sector_size) return 0; /* Check if we have a 'bad_lba' information */ if (!scsi_get_sense_info_fld(scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &bad_lba)) return 0; /* * If the bad lba was reported incorrectly, we have no idea where * the error is. */ start_lba = sectors_to_logical(sdev, blk_rq_pos(req)); end_lba = start_lba + bytes_to_logical(sdev, scsi_bufflen(scmd)); if (bad_lba < start_lba || bad_lba >= end_lba) return 0; /* * resid is optional but mostly filled in. When it's unused, * its value is zero, so we assume the whole buffer transferred */ transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd); /* This computation should always be done in terms of the * resolution of the device's medium. */ good_bytes = logical_to_bytes(sdev, bad_lba - start_lba); return min(good_bytes, transferred); } /** * sd_done - bottom half handler: called when the lower level * driver has completed (successfully or otherwise) a scsi command. * @SCpnt: mid-level's per command structure. * * Note: potentially run from within an ISR. Must not block. **/ static int sd_done(struct scsi_cmnd *SCpnt) { int result = SCpnt->result; unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); unsigned int sector_size = SCpnt->device->sector_size; unsigned int resid; struct scsi_sense_hdr sshdr; struct request *req = scsi_cmd_to_rq(SCpnt); struct scsi_disk *sdkp = scsi_disk(req->q->disk); int sense_valid = 0; int sense_deferred = 0; switch (req_op(req)) { case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: if (!result) { good_bytes = blk_rq_bytes(req); scsi_set_resid(SCpnt, 0); } else { good_bytes = 0; scsi_set_resid(SCpnt, blk_rq_bytes(req)); } break; default: /* * In case of bogus fw or device, we could end up having * an unaligned partial completion. Check this here and force * alignment. */ resid = scsi_get_resid(SCpnt); if (resid & (sector_size - 1)) { sd_printk(KERN_INFO, sdkp, "Unaligned partial completion (resid=%u, sector_sz=%u)\n", resid, sector_size); scsi_print_command(SCpnt); resid = min(scsi_bufflen(SCpnt), round_up(resid, sector_size)); scsi_set_resid(SCpnt, resid); } } if (result) { sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); if (sense_valid) sense_deferred = scsi_sense_is_deferred(&sshdr); } sdkp->medium_access_timed_out = 0; if (!scsi_status_is_check_condition(result) && (!sense_valid || sense_deferred)) goto out; switch (sshdr.sense_key) { case HARDWARE_ERROR: case MEDIUM_ERROR: good_bytes = sd_completed_bytes(SCpnt); break; case RECOVERED_ERROR: good_bytes = scsi_bufflen(SCpnt); break; case NO_SENSE: /* This indicates a false check condition, so ignore it. An * unknown amount of data was transferred so treat it as an * error. */ SCpnt->result = 0; memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); break; case ABORTED_COMMAND: if (sshdr.asc == 0x10) /* DIF: Target detected corruption */ good_bytes = sd_completed_bytes(SCpnt); break; case ILLEGAL_REQUEST: switch (sshdr.asc) { case 0x10: /* DIX: Host detected corruption */ good_bytes = sd_completed_bytes(SCpnt); break; case 0x20: /* INVALID COMMAND OPCODE */ case 0x24: /* INVALID FIELD IN CDB */ switch (SCpnt->cmnd[0]) { case UNMAP: sd_disable_discard(sdkp); break; case WRITE_SAME_16: case WRITE_SAME: if (SCpnt->cmnd[1] & 8) { /* UNMAP */ sd_disable_discard(sdkp); } else { sd_disable_write_same(sdkp); req->rq_flags |= RQF_QUIET; } break; } } break; default: break; } out: if (sdkp->device->type == TYPE_ZBC) good_bytes = sd_zbc_complete(SCpnt, good_bytes, &sshdr); SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt, "sd_done: completed %d of %d bytes\n", good_bytes, scsi_bufflen(SCpnt))); return good_bytes; } /* * spinup disk - called only in sd_revalidate_disk() */ static void sd_spinup_disk(struct scsi_disk *sdkp) { static const u8 cmd[10] = { TEST_UNIT_READY }; unsigned long spintime_expire = 0; int spintime, sense_valid = 0; unsigned int the_result; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ { .sense = UNIT_ATTENTION, .asc = 0x3A, .ascq = SCMD_FAILURE_ASCQ_ANY, .result = SAM_STAT_CHECK_CONDITION, }, { .sense = NOT_READY, .asc = 0x3A, .ascq = SCMD_FAILURE_ASCQ_ANY, .result = SAM_STAT_CHECK_CONDITION, }, /* Retry when scsi_status_is_good would return false 3 times */ { .result = SCMD_FAILURE_STAT_ANY, .allowed = 3, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; spintime = 0; /* Spin up drives, as required. Only do this at boot time */ /* Spinup needs to be done for module loads too. */ do { bool media_was_present = sdkp->media_present; scsi_failures_reset_retries(&failures); the_result = scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { /* * If the drive has indicated to us that it doesn't * have any media in it, don't bother with any more * polling. */ if (media_not_present(sdkp, &sshdr)) { if (media_was_present) sd_printk(KERN_NOTICE, sdkp, "Media removed, stopped polling\n"); return; } sense_valid = scsi_sense_valid(&sshdr); } if (!scsi_status_is_check_condition(the_result)) { /* no sense, TUR either succeeded or failed * with a status error */ if(!spintime && !scsi_status_is_good(the_result)) { sd_print_result(sdkp, "Test Unit Ready failed", the_result); } break; } /* * The device does not want the automatic start to be issued. */ if (sdkp->device->no_start_on_add) break; if (sense_valid && sshdr.sense_key == NOT_READY) { if (sshdr.asc == 4 && sshdr.ascq == 3) break; /* manual intervention required */ if (sshdr.asc == 4 && sshdr.ascq == 0xb) break; /* standby */ if (sshdr.asc == 4 && sshdr.ascq == 0xc) break; /* unavailable */ if (sshdr.asc == 4 && sshdr.ascq == 0x1b) break; /* sanitize in progress */ if (sshdr.asc == 4 && sshdr.ascq == 0x24) break; /* depopulation in progress */ if (sshdr.asc == 4 && sshdr.ascq == 0x25) break; /* depopulation restoration in progress */ /* * Issue command to spin up drive when not ready */ if (!spintime) { /* Return immediately and start spin cycle */ const u8 start_cmd[10] = { [0] = START_STOP, [1] = 1, [4] = sdkp->device->start_stop_pwr_cond ? 0x11 : 1, }; sd_printk(KERN_NOTICE, sdkp, "Spinning up disk..."); scsi_execute_cmd(sdkp->device, start_cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); spintime_expire = jiffies + 100 * HZ; spintime = 1; } /* Wait 1 second for next try */ msleep(1000); printk(KERN_CONT "."); /* * Wait for USB flash devices with slow firmware. * Yes, this sense key/ASC combination shouldn't * occur here. It's characteristic of these devices. */ } else if (sense_valid && sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x28) { if (!spintime) { spintime_expire = jiffies + 5 * HZ; spintime = 1; } /* Wait 1 second for next try */ msleep(1000); } else { /* we don't understand the sense code, so it's * probably pointless to loop */ if(!spintime) { sd_printk(KERN_NOTICE, sdkp, "Unit Not Ready\n"); sd_print_sense_hdr(sdkp, &sshdr); } break; } } while (spintime && time_before_eq(jiffies, spintime_expire)); if (spintime) { if (scsi_status_is_good(the_result)) printk(KERN_CONT "ready\n"); else printk(KERN_CONT "not responding...\n"); } } /* * Determine whether disk supports Data Integrity Field. */ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdp = sdkp->device; u8 type; if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) { sdkp->protection_type = 0; return 0; } type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ if (type > T10_PI_TYPE3_PROTECTION) { sd_printk(KERN_ERR, sdkp, "formatted with unsupported" \ " protection type %u. Disabling disk!\n", type); sdkp->protection_type = 0; return -ENODEV; } sdkp->protection_type = type; return 0; } static void sd_config_protection(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_device *sdp = sdkp->device; if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) sd_dif_config_host(sdkp, lim); if (!sdkp->protection_type) return; if (!scsi_host_dif_capable(sdp->host, sdkp->protection_type)) { sd_first_printk(KERN_NOTICE, sdkp, "Disabling DIF Type %u protection\n", sdkp->protection_type); sdkp->protection_type = 0; } sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIF Type %u protection\n", sdkp->protection_type); } static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, struct scsi_sense_hdr *sshdr, int sense_valid, int the_result) { if (sense_valid) sd_print_sense_hdr(sdkp, sshdr); else sd_printk(KERN_NOTICE, sdkp, "Sense not available.\n"); /* * Set dirty bit for removable devices if not ready - * sometimes drives will not report this properly. */ if (sdp->removable && sense_valid && sshdr->sense_key == NOT_READY) set_media_not_present(sdkp); /* * We used to set media_present to 0 here to indicate no media * in the drive, but some drives fail read capacity even with * media present, so we can't do that. */ sdkp->capacity = 0; /* unknown mapped to zero - as usual */ } #define RC16_LEN 32 #if RC16_LEN > SD_BUF_SIZE #error RC16_LEN must not be more than SD_BUF_SIZE #endif #define READ_CAPACITY_RETRIES_ON_RESET 10 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, struct queue_limits *lim, unsigned char *buffer) { unsigned char cmd[16]; struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, }; int sense_valid = 0; int the_result; int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; unsigned int alignment; unsigned long long lba; unsigned sector_size; if (sdp->no_read_capacity_16) return -EINVAL; do { memset(cmd, 0, 16); cmd[0] = SERVICE_ACTION_IN_16; cmd[1] = SAI_READ_CAPACITY_16; cmd[13] = RC16_LEN; memset(buffer, 0, RC16_LEN); the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, RC16_LEN, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { if (media_not_present(sdkp, &sshdr)) return -ENODEV; sense_valid = scsi_sense_valid(&sshdr); if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST && (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00) /* Invalid Command Operation Code or * Invalid Field in CDB, just retry * silently with RC10 */ return -EINVAL; if (sense_valid && sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29 && sshdr.ascq == 0x00) /* Device reset might occur several times, * give it one more chance */ if (--reset_retries > 0) continue; } retries--; } while (the_result && retries); if (the_result) { sd_print_result(sdkp, "Read Capacity(16) failed", the_result); read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); return -EINVAL; } sector_size = get_unaligned_be32(&buffer[8]); lba = get_unaligned_be64(&buffer[0]); if (sd_read_protection_type(sdkp, buffer) < 0) { sdkp->capacity = 0; return -ENODEV; } /* Logical blocks per physical block exponent */ sdkp->physical_block_size = (1 << (buffer[13] & 0xf)) * sector_size; /* RC basis */ sdkp->rc_basis = (buffer[12] >> 4) & 0x3; /* Lowest aligned logical block */ alignment = ((buffer[14] & 0x3f) << 8 | buffer[15]) * sector_size; lim->alignment_offset = alignment; if (alignment && sdkp->first_scan) sd_printk(KERN_NOTICE, sdkp, "physical block alignment offset: %u\n", alignment); if (buffer[14] & 0x80) { /* LBPME */ sdkp->lbpme = 1; if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; } sdkp->capacity = lba + 1; return sector_size; } static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { static const u8 cmd[10] = { READ_CAPACITY }; struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { /* Do not retry Medium Not Present */ { .sense = UNIT_ATTENTION, .asc = 0x3A, .result = SAM_STAT_CHECK_CONDITION, }, { .sense = NOT_READY, .asc = 0x3A, .result = SAM_STAT_CHECK_CONDITION, }, /* Device reset might occur several times so retry a lot */ { .sense = UNIT_ATTENTION, .asc = 0x29, .allowed = READ_CAPACITY_RETRIES_ON_RESET, .result = SAM_STAT_CHECK_CONDITION, }, /* Any other error not listed above retry 3 times */ { .result = SCMD_FAILURE_RESULT_ANY, .allowed = 3, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .failures = &failures, }; int sense_valid = 0; int the_result; sector_t lba; unsigned sector_size; memset(buffer, 0, 8); the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, 8, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (the_result > 0) { sense_valid = scsi_sense_valid(&sshdr); if (media_not_present(sdkp, &sshdr)) return -ENODEV; } if (the_result) { sd_print_result(sdkp, "Read Capacity(10) failed", the_result); read_capacity_error(sdkp, sdp, &sshdr, sense_valid, the_result); return -EINVAL; } sector_size = get_unaligned_be32(&buffer[4]); lba = get_unaligned_be32(&buffer[0]); if (sdp->no_read_capacity_16 && (lba == 0xffffffff)) { /* Some buggy (usb cardreader) devices return an lba of 0xffffffff when the want to report a size of 0 (with which they really mean no media is present) */ sdkp->capacity = 0; sdkp->physical_block_size = sector_size; return sector_size; } sdkp->capacity = lba + 1; sdkp->physical_block_size = sector_size; return sector_size; } static int sd_try_rc16_first(struct scsi_device *sdp) { if (sdp->host->max_cmd_len < 16) return 0; if (sdp->try_rc_10_first) return 0; if (sdp->scsi_level > SCSI_SPC_2) return 1; if (scsi_device_protection(sdp)) return 1; return 0; } /* * read disk capacity */ static void sd_read_capacity(struct scsi_disk *sdkp, struct queue_limits *lim, unsigned char *buffer) { int sector_size; struct scsi_device *sdp = sdkp->device; if (sd_try_rc16_first(sdp)) { sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size == -EOVERFLOW) goto got_data; if (sector_size == -ENODEV) return; if (sector_size < 0) sector_size = read_capacity_10(sdkp, sdp, buffer); if (sector_size < 0) return; } else { sector_size = read_capacity_10(sdkp, sdp, buffer); if (sector_size == -EOVERFLOW) goto got_data; if (sector_size < 0) return; if ((sizeof(sdkp->capacity) > 4) && (sdkp->capacity > 0xffffffffULL)) { int old_sector_size = sector_size; sd_printk(KERN_NOTICE, sdkp, "Very big device. " "Trying to use READ CAPACITY(16).\n"); sector_size = read_capacity_16(sdkp, sdp, lim, buffer); if (sector_size < 0) { sd_printk(KERN_NOTICE, sdkp, "Using 0xffffffff as device size\n"); sdkp->capacity = 1 + (sector_t) 0xffffffff; sector_size = old_sector_size; goto got_data; } /* Remember that READ CAPACITY(16) succeeded */ sdp->try_rc_10_first = 0; } } /* Some devices are known to return the total number of blocks, * not the highest block number. Some devices have versions * which do this and others which do not. Some devices we might * suspect of doing this but we don't know for certain. * * If we know the reported capacity is wrong, decrement it. If * we can only guess, then assume the number of blocks is even * (usually true but not always) and err on the side of lowering * the capacity. */ if (sdp->fix_capacity || (sdp->guess_capacity && (sdkp->capacity & 0x01))) { sd_printk(KERN_INFO, sdkp, "Adjusting the sector count " "from its reported value: %llu\n", (unsigned long long) sdkp->capacity); --sdkp->capacity; } got_data: if (sector_size == 0) { sector_size = 512; sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, " "assuming 512.\n"); } if (sector_size != 512 && sector_size != 1024 && sector_size != 2048 && sector_size != 4096) { sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d.\n", sector_size); /* * The user might want to re-format the drive with * a supported sectorsize. Once this happens, it * would be relatively trivial to set the thing up. * For this reason, we leave the thing in the table. */ sdkp->capacity = 0; /* * set a bogus sector size so the normal read/write * logic in the block layer will eventually refuse any * request on this device without tripping over power * of two sector size assumptions */ sector_size = 512; } lim->logical_block_size = sector_size; lim->physical_block_size = sdkp->physical_block_size; sdkp->device->sector_size = sector_size; if (sdkp->capacity > 0xffffffff) sdp->use_16_for_rw = 1; } /* * Print disk capacity */ static void sd_print_capacity(struct scsi_disk *sdkp, sector_t old_capacity) { int sector_size = sdkp->device->sector_size; char cap_str_2[10], cap_str_10[10]; if (!sdkp->first_scan && old_capacity == sdkp->capacity) return; string_get_size(sdkp->capacity, sector_size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); string_get_size(sdkp->capacity, sector_size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); sd_printk(KERN_NOTICE, sdkp, "%llu %d-byte logical blocks: (%s/%s)\n", (unsigned long long)sdkp->capacity, sector_size, cap_str_10, cap_str_2); if (sdkp->physical_block_size != sector_size) sd_printk(KERN_NOTICE, sdkp, "%u-byte physical blocks\n", sdkp->physical_block_size); } /* called with buffer of length 512 */ static inline int sd_do_mode_sense(struct scsi_disk *sdkp, int dbd, int modepage, unsigned char *buffer, int len, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { /* * If we must use MODE SENSE(10), make sure that the buffer length * is at least 8 bytes so that the mode sense header fits. */ if (sdkp->device->use_10_for_ms && len < 8) len = 8; return scsi_mode_sense(sdkp->device, dbd, modepage, 0, buffer, len, SD_TIMEOUT, sdkp->max_retries, data, sshdr); } /* * read write protect setting, if possible - called only in sd_revalidate_disk() * called with buffer of length SD_BUF_SIZE */ static void sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) { int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; int old_wp = sdkp->write_prot; set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return; } if (sdp->use_192_bytes_for_3f) { res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 192, &data, NULL); } else { /* * First attempt: ask for all pages (0x3F), but only 4 bytes. * We have to start carefully: some devices hang if we ask * for more than is available. */ res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 4, &data, NULL); /* * Second attempt: ask for page 0 When only page 0 is * implemented, a request for page 3F may return Sense Key * 5: Illegal Request, Sense Code 24: Invalid field in * CDB. */ if (res < 0) res = sd_do_mode_sense(sdkp, 0, 0, buffer, 4, &data, NULL); /* * Third attempt: ask 255 bytes, as we did earlier. */ if (res < 0) res = sd_do_mode_sense(sdkp, 0, 0x3F, buffer, 255, &data, NULL); } if (res < 0) { sd_first_printk(KERN_WARNING, sdkp, "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0); set_disk_ro(sdkp->disk, sdkp->write_prot); if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off"); sd_printk(KERN_DEBUG, sdkp, "Mode Sense: %4ph\n", buffer); } } } /* * sd_read_cache_type - called only from sd_revalidate_disk() * called with buffer of length SD_BUF_SIZE */ static void sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) { int len = 0, res; struct scsi_device *sdp = sdkp->device; int dbd; int modepage; int first_len; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; int old_wce = sdkp->WCE; int old_rcd = sdkp->RCD; int old_dpofua = sdkp->DPOFUA; if (sdkp->cache_override) return; first_len = 4; if (sdp->skip_ms_page_8) { if (sdp->type == TYPE_RBC) goto defaults; else { if (sdp->skip_ms_page_3f) goto defaults; modepage = 0x3F; if (sdp->use_192_bytes_for_3f) first_len = 192; dbd = 0; } } else if (sdp->type == TYPE_RBC) { modepage = 6; dbd = 8; } else { modepage = 8; dbd = 0; } /* cautiously ask */ res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, first_len, &data, &sshdr); if (res < 0) goto bad_sense; if (!data.header_length) { modepage = 6; first_len = 0; sd_first_printk(KERN_ERR, sdkp, "Missing header in MODE_SENSE response\n"); } /* that went OK, now ask for the proper length */ len = data.length; /* * We're only interested in the first three bytes, actually. * But the data cache page is defined for the first 20. */ if (len < 3) goto bad_sense; else if (len > SD_BUF_SIZE) { sd_first_printk(KERN_NOTICE, sdkp, "Truncating mode parameter " "data from %d to %d bytes\n", len, SD_BUF_SIZE); len = SD_BUF_SIZE; } if (modepage == 0x3F && sdp->use_192_bytes_for_3f) len = 192; /* Get the data */ if (len > first_len) res = sd_do_mode_sense(sdkp, dbd, modepage, buffer, len, &data, &sshdr); if (!res) { int offset = data.header_length + data.block_descriptor_length; while (offset < len) { u8 page_code = buffer[offset] & 0x3F; u8 spf = buffer[offset] & 0x40; if (page_code == 8 || page_code == 6) { /* We're interested only in the first 3 bytes. */ if (len - offset <= 2) { sd_first_printk(KERN_ERR, sdkp, "Incomplete mode parameter " "data\n"); goto defaults; } else { modepage = page_code; goto Page_found; } } else { /* Go to the next page */ if (spf && len - offset > 3) offset += 4 + (buffer[offset+2] << 8) + buffer[offset+3]; else if (!spf && len - offset > 1) offset += 2 + buffer[offset+1]; else { sd_first_printk(KERN_ERR, sdkp, "Incomplete mode " "parameter data\n"); goto defaults; } } } sd_first_printk(KERN_WARNING, sdkp, "No Caching mode page found\n"); goto defaults; Page_found: if (modepage == 8) { sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0); sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0); } else { sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0); sdkp->RCD = 0; } sdkp->DPOFUA = (data.device_specific & 0x10) != 0; if (sdp->broken_fua) { sd_first_printk(KERN_NOTICE, sdkp, "Disabling FUA\n"); sdkp->DPOFUA = 0; } else if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw && !sdkp->device->use_16_for_rw) { sd_first_printk(KERN_NOTICE, sdkp, "Uses READ/WRITE(6), disabling FUA\n"); sdkp->DPOFUA = 0; } /* No cache flush allowed for write protected devices */ if (sdkp->WCE && sdkp->write_prot) sdkp->WCE = 0; if (sdkp->first_scan || old_wce != sdkp->WCE || old_rcd != sdkp->RCD || old_dpofua != sdkp->DPOFUA) sd_printk(KERN_NOTICE, sdkp, "Write cache: %s, read cache: %s, %s\n", sdkp->WCE ? "enabled" : "disabled", sdkp->RCD ? "disabled" : "enabled", sdkp->DPOFUA ? "supports DPO and FUA" : "doesn't support DPO or FUA"); return; } bad_sense: if (res == -EIO && scsi_sense_valid(&sshdr) && sshdr.sense_key == ILLEGAL_REQUEST && sshdr.asc == 0x24 && sshdr.ascq == 0x0) /* Invalid field in CDB */ sd_first_printk(KERN_NOTICE, sdkp, "Cache data unavailable\n"); else sd_first_printk(KERN_ERR, sdkp, "Asking for cache data failed\n"); defaults: if (sdp->wce_default_on) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming drive cache: write back\n"); sdkp->WCE = 1; } else { sd_first_printk(KERN_WARNING, sdkp, "Assuming drive cache: write through\n"); sdkp->WCE = 0; } sdkp->RCD = 0; sdkp->DPOFUA = 0; } static bool sd_is_perm_stream(struct scsi_disk *sdkp, unsigned int stream_id) { u8 cdb[16] = { SERVICE_ACTION_IN_16, SAI_GET_STREAM_STATUS }; struct { struct scsi_stream_status_header h; struct scsi_stream_status s; } buf; struct scsi_device *sdev = sdkp->device; struct scsi_sense_hdr sshdr; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, }; int res; put_unaligned_be16(stream_id, &cdb[4]); put_unaligned_be32(sizeof(buf), &cdb[10]); res = scsi_execute_cmd(sdev, cdb, REQ_OP_DRV_IN, &buf, sizeof(buf), SD_TIMEOUT, sdkp->max_retries, &exec_args); if (res < 0) return false; if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); if (res) return false; if (get_unaligned_be32(&buf.h.len) < sizeof(struct scsi_stream_status)) return false; return buf.s.perm; } static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdp = sdkp->device; const struct scsi_io_group_descriptor *desc, *start, *end; u16 permanent_stream_count_old; struct scsi_sense_hdr sshdr; struct scsi_mode_data data; int res; if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS) return; res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a, /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (res < 0) return; start = (void *)buffer + data.header_length + 16; end = (void *)buffer + ALIGN_DOWN(data.header_length + data.length, sizeof(*end)); /* * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs * should assign the lowest numbered stream identifiers to permanent * streams. */ for (desc = start; desc < end; desc++) if (!desc->st_enble || !sd_is_perm_stream(sdkp, desc - start)) break; permanent_stream_count_old = sdkp->permanent_stream_count; sdkp->permanent_stream_count = desc - start; if (sdkp->rscs && sdkp->permanent_stream_count < 2) sd_printk(KERN_INFO, sdkp, "Unexpected: RSCS has been set and the permanent stream count is %u\n", sdkp->permanent_stream_count); else if (sdkp->permanent_stream_count != permanent_stream_count_old) sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n", sdkp->permanent_stream_count); } /* * The ATO bit indicates whether the DIF application tag is available * for use by the operating system. */ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) { int res, offset; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; struct scsi_sense_hdr sshdr; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC) return; if (sdkp->protection_type == 0) return; res = scsi_mode_sense(sdp, 1, 0x0a, 0, buffer, 36, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); if (res < 0 || !data.header_length || data.length < 6) { sd_first_printk(KERN_WARNING, sdkp, "getting Control mode page failed, assume no ATO\n"); if (res == -EIO && scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); return; } offset = data.header_length + data.block_descriptor_length; if ((buffer[offset] & 0x3f) != 0x0a) { sd_first_printk(KERN_ERR, sdkp, "ATO Got wrong page\n"); return; } if ((buffer[offset + 5] & 0x80) == 0) return; sdkp->ATO = 1; return; } static unsigned int sd_discard_mode(struct scsi_disk *sdkp) { if (!sdkp->lbpme) return SD_LBP_FULL; if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ if (sdkp->max_unmap_blocks) return SD_LBP_UNMAP; return SD_LBP_WS16; } /* LBP VPD page tells us what to use */ if (sdkp->lbpu && sdkp->max_unmap_blocks) return SD_LBP_UNMAP; if (sdkp->lbpws) return SD_LBP_WS16; if (sdkp->lbpws10) return SD_LBP_WS10; return SD_LBP_DISABLE; } /* * Query disk device for preferred I/O sizes. */ static void sd_read_block_limits(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_vpd *vpd; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb0); if (!vpd || vpd->len < 16) goto out; sdkp->min_xfer_blocks = get_unaligned_be16(&vpd->data[6]); sdkp->max_xfer_blocks = get_unaligned_be32(&vpd->data[8]); sdkp->opt_xfer_blocks = get_unaligned_be32(&vpd->data[12]); if (vpd->len >= 64) { unsigned int lba_count, desc_count; sdkp->max_ws_blocks = (u32)get_unaligned_be64(&vpd->data[36]); if (!sdkp->lbpme) goto config_atomic; lba_count = get_unaligned_be32(&vpd->data[20]); desc_count = get_unaligned_be32(&vpd->data[24]); if (lba_count && desc_count) sdkp->max_unmap_blocks = lba_count; sdkp->unmap_granularity = get_unaligned_be32(&vpd->data[28]); if (vpd->data[32] & 0x80) sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); config_atomic: sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); sdkp->atomic_granularity = get_unaligned_be32(&vpd->data[52]); sdkp->max_atomic_with_boundary = get_unaligned_be32(&vpd->data[56]); sdkp->max_atomic_boundary = get_unaligned_be32(&vpd->data[60]); sd_config_atomic(sdkp, lim); } out: rcu_read_unlock(); } /* Parse the Block Limits Extension VPD page (0xb7) */ static void sd_read_block_limits_ext(struct scsi_disk *sdkp) { struct scsi_vpd *vpd; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb7); if (vpd && vpd->len >= 6) sdkp->rscs = vpd->data[5] & 1; rcu_read_unlock(); } /* Query block device characteristics */ static void sd_read_block_characteristics(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_vpd *vpd; u16 rot; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb1); if (!vpd || vpd->len <= 8) { rcu_read_unlock(); return; } rot = get_unaligned_be16(&vpd->data[4]); sdkp->zoned = (vpd->data[8] >> 4) & 3; rcu_read_unlock(); if (rot == 1) lim->features &= ~(BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (!sdkp->first_scan) return; if (sdkp->device->type == TYPE_ZBC) sd_printk(KERN_NOTICE, sdkp, "Host-managed zoned block device\n"); else if (sdkp->zoned == 1) sd_printk(KERN_NOTICE, sdkp, "Host-aware SMR disk used as regular disk\n"); else if (sdkp->zoned == 2) sd_printk(KERN_NOTICE, sdkp, "Drive-managed SMR disk\n"); } /** * sd_read_block_provisioning - Query provisioning VPD page * @sdkp: disk to query */ static void sd_read_block_provisioning(struct scsi_disk *sdkp) { struct scsi_vpd *vpd; if (sdkp->lbpme == 0) return; rcu_read_lock(); vpd = rcu_dereference(sdkp->device->vpd_pgb2); if (!vpd || vpd->len < 8) { rcu_read_unlock(); return; } sdkp->lbpvpd = 1; sdkp->lbpu = (vpd->data[5] >> 7) & 1; /* UNMAP */ sdkp->lbpws = (vpd->data[5] >> 6) & 1; /* WRITE SAME(16) w/ UNMAP */ sdkp->lbpws10 = (vpd->data[5] >> 5) & 1; /* WRITE SAME(10) w/ UNMAP */ rcu_read_unlock(); } static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdev = sdkp->device; if (sdev->host->no_write_same) { sdev->no_write_same = 1; return; } if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY, 0) < 0) { struct scsi_vpd *vpd; sdev->no_report_opcodes = 1; /* Disable WRITE SAME if REPORT SUPPORTED OPERATION * CODES is unsupported and the device has an ATA * Information VPD page (SAT). */ rcu_read_lock(); vpd = rcu_dereference(sdev->vpd_pg89); if (vpd) sdev->no_write_same = 1; rcu_read_unlock(); } if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME_16, 0) == 1) sdkp->ws16 = 1; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, WRITE_SAME, 0) == 1) sdkp->ws10 = 1; } static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer) { struct scsi_device *sdev = sdkp->device; if (!sdev->security_supported) return; if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, SECURITY_PROTOCOL_IN, 0) == 1 && scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, SECURITY_PROTOCOL_OUT, 0) == 1) sdkp->security = 1; } static inline sector_t sd64_to_sectors(struct scsi_disk *sdkp, u8 *buf) { return logical_to_sectors(sdkp->device, get_unaligned_be64(buf)); } /** * sd_read_cpr - Query concurrent positioning ranges * @sdkp: disk to query */ static void sd_read_cpr(struct scsi_disk *sdkp) { struct blk_independent_access_ranges *iars = NULL; unsigned char *buffer = NULL; unsigned int nr_cpr = 0; int i, vpd_len, buf_len = SD_BUF_SIZE; u8 *desc; /* * We need to have the capacity set first for the block layer to be * able to check the ranges. */ if (sdkp->first_scan) return; if (!sdkp->capacity) goto out; /* * Concurrent Positioning Ranges VPD: there can be at most 256 ranges, * leading to a maximum page size of 64 + 256*32 bytes. */ buf_len = 64 + 256*32; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer || scsi_get_vpd_page(sdkp->device, 0xb9, buffer, buf_len)) goto out; /* We must have at least a 64B header and one 32B range descriptor */ vpd_len = get_unaligned_be16(&buffer[2]) + 4; if (vpd_len > buf_len || vpd_len < 64 + 32 || (vpd_len & 31)) { sd_printk(KERN_ERR, sdkp, "Invalid Concurrent Positioning Ranges VPD page\n"); goto out; } nr_cpr = (vpd_len - 64) / 32; if (nr_cpr == 1) { nr_cpr = 0; goto out; } iars = disk_alloc_independent_access_ranges(sdkp->disk, nr_cpr); if (!iars) { nr_cpr = 0; goto out; } desc = &buffer[64]; for (i = 0; i < nr_cpr; i++, desc += 32) { if (desc[0] != i) { sd_printk(KERN_ERR, sdkp, "Invalid Concurrent Positioning Range number\n"); nr_cpr = 0; break; } iars->ia_range[i].sector = sd64_to_sectors(sdkp, desc + 8); iars->ia_range[i].nr_sectors = sd64_to_sectors(sdkp, desc + 16); } out: disk_set_independent_access_ranges(sdkp->disk, iars); if (nr_cpr && sdkp->nr_actuators != nr_cpr) { sd_printk(KERN_NOTICE, sdkp, "%u concurrent positioning ranges\n", nr_cpr); sdkp->nr_actuators = nr_cpr; } kfree(buffer); } static bool sd_validate_min_xfer_size(struct scsi_disk *sdkp) { struct scsi_device *sdp = sdkp->device; unsigned int min_xfer_bytes = logical_to_bytes(sdp, sdkp->min_xfer_blocks); if (sdkp->min_xfer_blocks == 0) return false; if (min_xfer_bytes & (sdkp->physical_block_size - 1)) { sd_first_printk(KERN_WARNING, sdkp, "Preferred minimum I/O size %u bytes not a " \ "multiple of physical block size (%u bytes)\n", min_xfer_bytes, sdkp->physical_block_size); sdkp->min_xfer_blocks = 0; return false; } sd_first_printk(KERN_INFO, sdkp, "Preferred minimum I/O size %u bytes\n", min_xfer_bytes); return true; } /* * Determine the device's preferred I/O size for reads and writes * unless the reported value is unreasonably small, large, not a * multiple of the physical block size, or simply garbage. */ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, unsigned int dev_max) { struct scsi_device *sdp = sdkp->device; unsigned int opt_xfer_bytes = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); unsigned int min_xfer_bytes = logical_to_bytes(sdp, sdkp->min_xfer_blocks); if (sdkp->opt_xfer_blocks == 0) return false; if (sdkp->opt_xfer_blocks > dev_max) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ "> dev_max (%u logical blocks)\n", sdkp->opt_xfer_blocks, dev_max); return false; } if (sdkp->opt_xfer_blocks > SD_DEF_XFER_BLOCKS) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ "> sd driver limit (%u logical blocks)\n", sdkp->opt_xfer_blocks, SD_DEF_XFER_BLOCKS); return false; } if (opt_xfer_bytes < PAGE_SIZE) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes < " \ "PAGE_SIZE (%u bytes)\n", opt_xfer_bytes, (unsigned int)PAGE_SIZE); return false; } if (min_xfer_bytes && opt_xfer_bytes % min_xfer_bytes) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes not a " \ "multiple of preferred minimum block " \ "size (%u bytes)\n", opt_xfer_bytes, min_xfer_bytes); return false; } if (opt_xfer_bytes & (sdkp->physical_block_size - 1)) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u bytes not a " \ "multiple of physical block size (%u bytes)\n", opt_xfer_bytes, sdkp->physical_block_size); return false; } sd_first_printk(KERN_INFO, sdkp, "Optimal transfer size %u bytes\n", opt_xfer_bytes); return true; } static void sd_read_block_zero(struct scsi_disk *sdkp) { struct scsi_device *sdev = sdkp->device; unsigned int buf_len = sdev->sector_size; u8 *buffer, cmd[16] = { }; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer) return; if (sdev->use_16_for_rw) { cmd[0] = READ_16; put_unaligned_be64(0, &cmd[2]); /* Logical block address 0 */ put_unaligned_be32(1, &cmd[10]);/* Transfer 1 logical block */ } else { cmd[0] = READ_10; put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ } scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, SD_TIMEOUT, sdkp->max_retries, NULL); kfree(buffer); } /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. * @disk: struct gendisk we care about **/ static int sd_revalidate_disk(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; sector_t old_capacity = sdkp->capacity; struct queue_limits lim; unsigned char *buffer; unsigned int dev_max; int err; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); /* * If the device is offline, don't try and read capacity or any * of the other niceties. */ if (!scsi_device_online(sdp)) goto out; buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); if (!buffer) { sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " "allocation failure.\n"); goto out; } sd_spinup_disk(sdkp); lim = queue_limits_start_update(sdkp->disk->queue); /* * Without media there is no reason to ask; moreover, some devices * react badly if we do. */ if (sdkp->media_present) { sd_read_capacity(sdkp, &lim, buffer); /* * Some USB/UAS devices return generic values for mode pages * until the media has been accessed. Trigger a READ operation * to force the device to populate mode pages. */ if (sdp->read_before_ms) sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will * cause this to be updated correctly and any device which * doesn't support it should be treated as rotational. */ lim.features |= (BLK_FEAT_ROTATIONAL | BLK_FEAT_ADD_RANDOM); if (scsi_device_supports_vpd(sdp)) { sd_read_block_provisioning(sdkp); sd_read_block_limits(sdkp, &lim); sd_read_block_limits_ext(sdkp); sd_read_block_characteristics(sdkp, &lim); sd_zbc_read_zones(sdkp, &lim, buffer); } sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp)); sd_print_capacity(sdkp, old_capacity); sd_read_write_protect_flag(sdkp, buffer); sd_read_cache_type(sdkp, buffer); sd_read_io_hints(sdkp, buffer); sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); sd_config_protection(sdkp, &lim); } /* * We now have all cache related info, determine how we deal * with flush requests. */ sd_set_flush_flag(sdkp, &lim); /* Initial block count limit based on CDB TRANSFER LENGTH field size. */ dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS; /* Some devices report a maximum block count for READ/WRITE requests. */ dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks); lim.max_dev_sectors = logical_to_sectors(sdp, dev_max); if (sd_validate_min_xfer_size(sdkp)) lim.io_min = logical_to_bytes(sdp, sdkp->min_xfer_blocks); else lim.io_min = 0; /* * Limit default to SCSI host optimal sector limit if set. There may be * an impact on performance for when the size of a request exceeds this * host limit. */ lim.io_opt = sdp->host->opt_sectors << SECTOR_SHIFT; if (sd_validate_opt_xfer_size(sdkp, dev_max)) { lim.io_opt = min_not_zero(lim.io_opt, logical_to_bytes(sdp, sdkp->opt_xfer_blocks)); } sdkp->first_scan = 0; set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp, &lim); kfree(buffer); err = queue_limits_commit_update_frozen(sdkp->disk->queue, &lim); if (err) return err; /* * Query concurrent positioning ranges after * queue_limits_commit_update() unlocked q->limits_lock to avoid * deadlock with q->sysfs_dir_lock and q->sysfs_lock. */ if (sdkp->media_present && scsi_device_supports_vpd(sdp)) sd_read_cpr(sdkp); /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk * capacity to 0. */ if (sd_zbc_revalidate_zones(sdkp)) set_capacity_and_notify(disk, 0); out: return 0; } /** * sd_unlock_native_capacity - unlock native capacity * @disk: struct gendisk to set capacity for * * Block layer calls this function if it detects that partitions * on @disk reach beyond the end of the device. If the SCSI host * implements ->unlock_native_capacity() method, it's invoked to * give it a chance to adjust the device capacity. * * CONTEXT: * Defined by block layer. Might sleep. */ static void sd_unlock_native_capacity(struct gendisk *disk) { struct scsi_device *sdev = scsi_disk(disk)->device; if (sdev->host->hostt->unlock_native_capacity) sdev->host->hostt->unlock_native_capacity(sdev); } /** * sd_format_disk_name - format disk name * @prefix: name prefix - ie. "sd" for SCSI disks * @index: index of the disk to format name for * @buf: output buffer * @buflen: length of the output buffer * * SCSI disk names starts at sda. The 26th device is sdz and the * 27th is sdaa. The last one for two lettered suffix is sdzz * which is followed by sdaaa. * * This is basically 26 base counting with one extra 'nil' entry * at the beginning from the second digit on and can be * determined using similar method as 26 base conversion with the * index shifted -1 after each digit is computed. * * CONTEXT: * Don't care. * * RETURNS: * 0 on success, -errno on failure. */ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) { const int base = 'z' - 'a' + 1; char *begin = buf + strlen(prefix); char *end = buf + buflen; char *p; int unit; p = end - 1; *p = '\0'; unit = base; do { if (p == begin) return -EINVAL; *--p = 'a' + (index % unit); index = (index / unit) - 1; } while (index >= 0); memmove(begin, p, end - p); memcpy(buf, prefix, strlen(prefix)); return 0; } /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once * for each scsi device (not just disks) present. * @dev: pointer to device object * * Returns 0 if successful (or not interested in this scsi device * (e.g. scanner)); 1 when there is an error. * * Note: this function is invoked from the scsi mid-level. * This function sets up the mapping between a given * <host,channel,id,lun> (found in sdp) and new device name * (e.g. /dev/sda). More precisely it is the block device major * and minor number that is chosen here. * * Assume sd_probe is not re-entrant (for time being) * Also think about sd_probe() and sd_remove() running coincidentally. **/ static int sd_probe(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); struct scsi_disk *sdkp; struct gendisk *gd; int index; int error; scsi_autopm_get_device(sdp); error = -ENODEV; if (sdp->type != TYPE_DISK && sdp->type != TYPE_ZBC && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC) goto out; if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && sdp->type == TYPE_ZBC) { sdev_printk(KERN_WARNING, sdp, "Unsupported ZBC host-managed device.\n"); goto out; } SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp, "sd_probe\n")); error = -ENOMEM; sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL); if (!sdkp) goto out; gd = blk_mq_alloc_disk_for_queue(sdp->request_queue, &sd_bio_compl_lkclass); if (!gd) goto out_free; index = ida_alloc(&sd_index_ida, GFP_KERNEL); if (index < 0) { sdev_printk(KERN_WARNING, sdp, "sd_probe: memory exhausted.\n"); goto out_put; } error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); if (error) { sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); goto out_free_index; } sdkp->device = sdp; sdkp->disk = gd; sdkp->index = index; sdkp->max_retries = SD_MAX_RETRIES; atomic_set(&sdkp->openers, 0); atomic_set(&sdkp->device->ioerr_cnt, 0); if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); else blk_queue_rq_timeout(sdp->request_queue, SD_MOD_TIMEOUT); } device_initialize(&sdkp->disk_dev); sdkp->disk_dev.parent = get_device(dev); sdkp->disk_dev.class = &sd_disk_class; dev_set_name(&sdkp->disk_dev, "%s", dev_name(dev)); error = device_add(&sdkp->disk_dev); if (error) { put_device(&sdkp->disk_dev); goto out; } dev_set_drvdata(dev, sdkp); gd->major = sd_major((index & 0xf0) >> 4); gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); gd->minors = SD_MINORS; gd->fops = &sd_fops; gd->private_data = sdkp; /* defaults, until the device tells us otherwise */ sdp->sector_size = 512; sdkp->capacity = 0; sdkp->media_present = 1; sdkp->write_prot = 0; sdkp->cache_override = 0; sdkp->WCE = 0; sdkp->RCD = 0; sdkp->ATO = 0; sdkp->first_scan = 1; sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS; sd_revalidate_disk(gd); if (sdp->removable) { gd->flags |= GENHD_FL_REMOVABLE; gd->events |= DISK_EVENT_MEDIA_CHANGE; gd->event_flags = DISK_EVENT_FLAG_POLL | DISK_EVENT_FLAG_UEVENT; } blk_pm_runtime_init(sdp->request_queue, dev); if (sdp->rpm_autosuspend) { pm_runtime_set_autosuspend_delay(dev, sdp->host->rpm_autosuspend_delay); } error = device_add_disk(dev, gd, NULL); if (error) { device_unregister(&sdkp->disk_dev); put_disk(gd); goto out; } if (sdkp->security) { sdkp->opal_dev = init_opal_dev(sdkp, &sd_sec_submit); if (sdkp->opal_dev) sd_printk(KERN_NOTICE, sdkp, "supports TCG Opal\n"); } sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", sdp->removable ? "removable " : ""); scsi_autopm_put_device(sdp); return 0; out_free_index: ida_free(&sd_index_ida, index); out_put: put_disk(gd); out_free: kfree(sdkp); out: scsi_autopm_put_device(sdp); return error; } /** * sd_remove - called whenever a scsi disk (previously recognized by * sd_probe) is detached from the system. It is called (potentially * multiple times) during sd module unload. * @dev: pointer to device object * * Note: this function is invoked from the scsi mid-level. * This function potentially frees up a device name (e.g. /dev/sdc) * that could be re-used by a subsequent sd_probe(). * This function is not called when the built-in sd driver is "exit-ed". **/ static int sd_remove(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); scsi_autopm_get_device(sdkp->device); device_del(&sdkp->disk_dev); del_gendisk(sdkp->disk); if (!sdkp->suspended) sd_shutdown(dev); put_disk(sdkp->disk); return 0; } static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); ida_free(&sd_index_ida, sdkp->index); put_device(&sdkp->device->sdev_gendev); free_opal_dev(sdkp->opal_dev); kfree(sdkp); } static int sd_start_stop_device(struct scsi_disk *sdkp, int start) { unsigned char cmd[6] = { START_STOP }; /* START_VALID */ struct scsi_sense_hdr sshdr; struct scsi_failure failure_defs[] = { { /* Power on, reset, or bus device reset occurred */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 0, .result = SAM_STAT_CHECK_CONDITION, }, { /* Power on occurred */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 1, .result = SAM_STAT_CHECK_CONDITION, }, { /* SCSI bus reset */ .sense = UNIT_ATTENTION, .asc = 0x29, .ascq = 2, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .total_allowed = 3, .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .sshdr = &sshdr, .req_flags = BLK_MQ_REQ_PM, .failures = &failures, }; struct scsi_device *sdp = sdkp->device; int res; if (start) cmd[4] |= 1; /* START */ if (sdp->start_stop_pwr_cond) cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */ if (!scsi_device_online(sdp)) return -ENODEV; res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, SD_TIMEOUT, sdkp->max_retries, &exec_args); if (res) { sd_print_result(sdkp, "Start/Stop Unit failed", res); if (res > 0 && scsi_sense_valid(&sshdr)) { sd_print_sense_hdr(sdkp, &sshdr); /* 0x3a is medium not present */ if (sshdr.asc == 0x3a) res = 0; } } /* SCSI error codes must not go to the generic layer */ if (res) return -EIO; return 0; } /* * Send a SYNCHRONIZE CACHE instruction down to the device through * the normal SCSI command structure. Wait for the command to * complete. */ static void sd_shutdown(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); if (!sdkp) return; /* this can happen */ if (pm_runtime_suspended(dev)) return; if (sdkp->WCE && sdkp->media_present) { sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); sd_sync_cache(sdkp); } if ((system_state != SYSTEM_RESTART && sdkp->device->manage_system_start_stop) || (system_state == SYSTEM_POWER_OFF && sdkp->device->manage_shutdown)) { sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); sd_start_stop_device(sdkp, 0); } } static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime) { return (sdev->manage_system_start_stop && !runtime) || (sdev->manage_runtime_start_stop && runtime); } static int sd_suspend_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret = 0; if (!sdkp) /* E.g.: runtime suspend following sd_remove() */ return 0; if (sdkp->WCE && sdkp->media_present) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp); /* ignore OFFLINE device */ if (ret == -ENODEV) return 0; if (ret) return ret; } if (sd_do_start_stop(sdkp->device, runtime)) { if (!sdkp->device->silence_suspend) sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); if (!runtime) ret = 0; } if (!ret) sdkp->suspended = true; return ret; } static int sd_suspend_system(struct device *dev) { if (pm_runtime_suspended(dev)) return 0; return sd_suspend_common(dev, false); } static int sd_suspend_runtime(struct device *dev) { return sd_suspend_common(dev, true); } static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; } return 0; } static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); sdkp->suspended = false; } return ret; } static int sd_resume_system(struct device *dev) { if (pm_runtime_suspended(dev)) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp = sdkp ? sdkp->device : NULL; if (sdp && sdp->force_runtime_start_on_system_start) pm_request_resume(dev); return 0; } return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; sdp = sdkp->device; if (sdp->ignore_media_change) { /* clear the device's sense data */ static const u8 cmd[10] = { REQUEST_SENSE }; const struct scsi_exec_args exec_args = { .req_flags = BLK_MQ_REQ_PM, }; if (scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, sdp->request_queue->rq_timeout, 1, &exec_args)) sd_printk(KERN_NOTICE, sdkp, "Failed to clear sense data\n"); } return sd_resume_common(dev, true); } static const struct dev_pm_ops sd_pm_ops = { .suspend = sd_suspend_system, .resume = sd_resume_system, .poweroff = sd_suspend_system, .restore = sd_resume_system, .runtime_suspend = sd_suspend_runtime, .runtime_resume = sd_resume_runtime, }; static struct scsi_driver sd_template = { .gendrv = { .name = "sd", .probe = sd_probe, .probe_type = PROBE_PREFER_ASYNCHRONOUS, .remove = sd_remove, .shutdown = sd_shutdown, .pm = &sd_pm_ops, }, .rescan = sd_rescan, .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, .eh_action = sd_eh_action, .eh_reset = sd_eh_reset, }; /** * init_sd - entry point for this driver (both when built in or when * a module). * * Note: this function registers this driver with the scsi mid-level. **/ static int __init init_sd(void) { int majors = 0, i, err; SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); for (i = 0; i < SD_MAJORS; i++) { if (__register_blkdev(sd_major(i), "sd", sd_default_probe)) continue; majors++; } if (!majors) return -ENODEV; err = class_register(&sd_disk_class); if (err) goto err_out; sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0); if (!sd_page_pool) { printk(KERN_ERR "sd: can't init discard page pool\n"); err = -ENOMEM; goto err_out_class; } err = scsi_register_driver(&sd_template.gendrv); if (err) goto err_out_driver; return 0; err_out_driver: mempool_destroy(sd_page_pool); err_out_class: class_unregister(&sd_disk_class); err_out: for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); return err; } /** * exit_sd - exit point for this driver (when it is a module). * * Note: this function unregisters this driver from the scsi mid-level. **/ static void __exit exit_sd(void) { int i; SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n")); scsi_unregister_driver(&sd_template.gendrv); mempool_destroy(sd_page_pool); class_unregister(&sd_disk_class); for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); } module_init(init_sd); module_exit(exit_sd); void sd_print_sense_hdr(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { scsi_print_sense_hdr(sdkp->device, sdkp->disk ? sdkp->disk->disk_name : NULL, sshdr); } void sd_print_result(const struct scsi_disk *sdkp, const char *msg, int result) { const char *hb_string = scsi_hostbyte_string(result); if (hb_string) sd_printk(KERN_INFO, sdkp, "%s: Result: hostbyte=%s driverbyte=%s\n", msg, hb_string ? hb_string : "invalid", "DRIVER_OK"); else sd_printk(KERN_INFO, sdkp, "%s: Result: hostbyte=0x%02x driverbyte=%s\n", msg, host_byte(result), "DRIVER_OK"); }
4 4 4 4 4 4 4 4 96 77 77 68 9 77 13 10 3 13 13 12 6 73 14 72 4 31 2 33 13 86 86 86 5 4 12 41 37 6 6 6 66 40 50 35 2 21 48 12 14 8 3 11 3 3 3 3 3 2 2 39 39 117 158 21 69 27 16 1 15 1 1 42 42 18 18 24 42 42 41 36 5 3 2 38 39 3 42 1 1 1 1 1 69 3 3 63 62 62 61 57 57 56 1 58 62 2 63 1 60 1 1 1 60 61 62 70 70 5 63 1 65 90 11 79 6 85 90 67 18 20 6 6 2 2 1 39 57 49 1 1 1 1 46 37 7 1 2 6 39 17 1 3 1 1 1 13 8 3 11 14 3 342 24 24 5 93 14 72 85 84 85 85 77 8 52 44 8 48 1 41 6 24 23 15 32 45 2 45 2 45 2 45 2 40 7 39 8 45 2 48 48 44 4 8 39 32 8 4 1 1 2 2 1 1 136 212 1 4 2 3 1 1 3 2 1 2 201 12 334 12 12 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 tunneling device * Linux INET6 implementation * * Authors: * Ville Nuorvala <vnuorval@tcs.hut.fi> * Yasuyuki Kozakai <kozakai@linux-ipv6.org> * * Based on: * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c * * RFC 2473 */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/sockios.h> #include <linux/icmp.h> #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmpv6.h> #include <linux/init.h> #include <linux/route.h> #include <linux/rtnetlink.h> #include <linux/netfilter_ipv6.h> #include <linux/slab.h> #include <linux/hash.h> #include <linux/etherdevice.h> #include <linux/uaccess.h> #include <linux/atomic.h> #include <net/icmp.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/ip6_tunnel.h> #include <net/xfrm.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/netdev_lock.h> #include <net/dst_metadata.h> #include <net/inet_dscp.h> MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); #define IP6_TUNNEL_HASH_SIZE_SHIFT 5 #define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); } static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); static struct rtnl_link_ops ip6_link_ops __read_mostly; static unsigned int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; struct ip6_tnl __rcu *collect_md_tun; }; static inline int ip6_tnl_mpls_supported(void) { return IS_ENABLED(CONFIG_MPLS); } #define for_each_ip6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @net: network namespace * @link: ifindex of underlying interface * @remote: the address of the tunnel exit-point * @local: the address of the tunnel entry-point * * Return: * tunnel matching given end-points if found, * else fallback tunnel if its device is up, * else %NULL **/ static struct ip6_tnl * ip6_tnl_lookup(struct net *net, int link, const struct in6_addr *remote, const struct in6_addr *local) { unsigned int hash = HASH(remote, local); struct ip6_tnl *t, *cand = NULL; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct in6_addr any; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_equal(remote, &t->parms.raddr) || !(t->dev->flags & IFF_UP)) continue; if (link == t->parms.link) return t; else cand = t; } memset(&any, 0, sizeof(any)); hash = HASH(&any, local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_any(&t->parms.raddr) || !(t->dev->flags & IFF_UP)) continue; if (link == t->parms.link) return t; else if (!cand) cand = t; } hash = HASH(remote, &any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(remote, &t->parms.raddr) || !ipv6_addr_any(&t->parms.laddr) || !(t->dev->flags & IFF_UP)) continue; if (link == t->parms.link) return t; else if (!cand) cand = t; } if (cand) return cand; t = rcu_dereference(ip6n->collect_md_tun); if (t && t->dev->flags & IFF_UP) return t; t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } /** * ip6_tnl_bucket - get head of list matching given tunnel parameters * @ip6n: the private data for ip6_vti in the netns * @p: parameters containing tunnel end-points * * Description: * ip6_tnl_bucket() returns the head of the list matching the * &struct in6_addr entries laddr and raddr in @p. * * Return: head of IPv6 tunnel list **/ static struct ip6_tnl __rcu ** ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; unsigned int h = 0; int prio = 0; if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; h = HASH(remote, local); } return &ip6n->tnls[prio][h]; } /** * ip6_tnl_link - add tunnel to hash table * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be added **/ static void ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); if (t->parms.collect_md) rcu_assign_pointer(ip6n->collect_md_tun, t); rcu_assign_pointer(t->next , rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } /** * ip6_tnl_unlink - remove tunnel from hash table * @ip6n: the private data for ip6_vti in the netns * @t: tunnel to be removed **/ static void ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; if (t->parms.collect_md) rcu_assign_pointer(ip6n->collect_md_tun, NULL); for (tp = ip6_tnl_bucket(ip6n, &t->parms); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { if (t == iter) { rcu_assign_pointer(*tp, t->next); break; } } } static void ip6_dev_free(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); } static int ip6_tnl_create2(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct ip6_tnl_net *ip6n = net_generic(t->net, ip6_tnl_net_id); int err; dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); ip6_tnl_link(ip6n, t); return 0; out: return err; } /** * ip6_tnl_create - create a new tunnel * @net: network namespace * @p: tunnel parameters * * Description: * Create tunnel matching given parameters. * * Return: * created tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; int err = -E2BIG; if (p->name[0]) { if (!dev_valid_name(p->name)) goto failed; strscpy(name, p->name, IFNAMSIZ); } else { sprintf(name, "ip6tnl%%d"); } err = -ENOMEM; dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!dev) goto failed; dev_net_set(dev, net); t = netdev_priv(dev); t->parms = *p; t->net = dev_net(dev); err = ip6_tnl_create2(dev); if (err < 0) goto failed_free; return t; failed_free: free_netdev(dev); failed: return ERR_PTR(err); } /** * ip6_tnl_locate - find or create tunnel matching given parameters * @net: network namespace * @p: tunnel parameters * @create: != 0 if allowed to create new tunnel if no match found * * Description: * ip6_tnl_locate() first tries to locate an existing tunnel * based on @parms. If this is unsuccessful, but @create is set a new * tunnel device is created and registered for use. * * Return: * matching tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, struct __ip6_tnl_parm *p, int create) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; struct ip6_tnl __rcu **tp; struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); for (tp = ip6_tnl_bucket(ip6n, p); (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && p->link == t->parms.link) { if (create) return ERR_PTR(-EEXIST); return t; } } if (!create) return ERR_PTR(-ENODEV); return ip6_tnl_create(net, p); } /** * ip6_tnl_dev_uninit - tunnel device uninitializer * @dev: the device to be destroyed * * Description: * ip6_tnl_dev_uninit() removes tunnel from its list **/ static void ip6_tnl_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else ip6_tnl_unlink(ip6n, t); dst_cache_reset(&t->dst_cache); netdev_put(dev, &t->dev_tracker); } /** * ip6_tnl_parse_tlv_enc_lim - handle encapsulation limit option * @skb: received socket buffer * @raw: the ICMPv6 error message data * * Return: * 0 if none was found, * else index to encapsulation limit **/ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw; unsigned int nhoff = raw - skb->data; unsigned int off = nhoff + sizeof(*ipv6h); u8 nexthdr = ipv6h->nexthdr; while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) { struct ipv6_opt_hdr *hdr; u16 optlen; if (!pskb_may_pull(skb, off + sizeof(*hdr))) break; hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { optlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { optlen = ipv6_authlen(hdr); } else { optlen = ipv6_optlen(hdr); } if (!pskb_may_pull(skb, off + optlen)) break; hdr = (struct ipv6_opt_hdr *)(skb->data + off); if (nexthdr == NEXTHDR_FRAGMENT) { struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr; if (frag_hdr->frag_off) break; } if (nexthdr == NEXTHDR_DEST) { u16 i = 2; while (1) { struct ipv6_tlv_tnl_enc_lim *tel; /* No more room for encapsulation limit */ if (i + sizeof(*tel) > optlen) break; tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i); /* return index of option if found and valid */ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT && tel->length == 1) return i + off - nhoff; /* else jump to next option */ if (tel->type) i += tel->length + 2; else i++; } } nexthdr = hdr->nexthdr; off += optlen; } return 0; } EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); /* ip6_tnl_err() should handle errors in the tunnel according to the * specifications in RFC 2473. */ static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, u8 *type, u8 *code, int *msg, __u32 *info, int offset) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; struct net *net = dev_net(skb->dev); u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; __u32 rel_info = 0; struct ip6_tnl *t; int err = -ENOENT; int rel_msg = 0; u8 tproto; __u16 len; /* If the packet doesn't contain the original IPv6 header we are in trouble since we might need the source address for further processing of the error. */ rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->daddr, &ipv6h->saddr); if (!t) goto out; tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto out; err = 0; switch (*type) { case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); rel_msg = 1; break; case ICMPV6_TIME_EXCEED: if ((*code) == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); rel_msg = 1; } break; case ICMPV6_PARAMPROB: { struct ipv6_tlv_tnl_enc_lim *tel; __u32 teli; teli = 0; if ((*code) == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", t->parms.name); rel_msg = 1; } } else { net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } break; } case ICMPV6_PKT_TOOBIG: { __u32 mtu; ip6_update_pmtu(skb, net, htonl(*info), 0, 0, sock_net_uid(net, NULL)); mtu = *info - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; rel_msg = 1; } break; } case NDISC_REDIRECT: ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); break; } *type = rel_type; *code = rel_code; *info = rel_info; *msg = rel_msg; out: rcu_read_unlock(); return err; } static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __u32 rel_info = ntohl(info); const struct iphdr *eiph; struct sk_buff *skb2; int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; struct rtable *rt; struct flowi4 fl4; err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); if (err < 0) return err; if (rel_msg == 0) return 0; switch (rel_type) { case ICMPV6_DEST_UNREACH: if (rel_code != ICMPV6_ADDR_UNREACH) return 0; rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_HOST_UNREACH; break; case ICMPV6_PKT_TOOBIG: if (rel_code != 0) return 0; rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; default: return 0; } if (!pskb_may_pull(skb, offset + sizeof(struct iphdr))) return 0; skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) return 0; skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); eiph = ip_hdr(skb2); /* Try to guess incoming interface */ rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr, 0, 0, 0, IPPROTO_IPIP, eiph->tos & INET_DSCP_MASK, 0); if (IS_ERR(rt)) goto out; skb2->dev = rt->dst.dev; ip_rt_put(rt); /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->daddr, eiph->saddr, 0, 0, IPPROTO_IPIP, eiph->tos & INET_DSCP_MASK, 0); if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; } skb_dst_set(skb2, &rt->dst); } else { if (ip_route_input(skb2, eiph->daddr, eiph->saddr, ip4h_dscp(eiph), skb2->dev) || skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } /* change mtu on this route */ if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) { if (rel_info > dst_mtu(skb_dst(skb2))) goto out; skb_dst_update_pmtu_no_confirm(skb2, rel_info); } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); out: kfree_skb(skb2); return 0; } static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __u32 rel_info = ntohl(info); int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); if (err < 0) return err; if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) { struct rt6_info *rt; struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) return 0; skb_dst_drop(skb2); skb_pull(skb2, offset); skb_reset_network_header(skb2); /* Try to guess incoming interface */ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, skb2, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; icmpv6_send(skb2, rel_type, rel_code, rel_info); ip6_rt_put(rt); kfree_skb(skb2); } return 0; } static int mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __u32 rel_info = ntohl(info); int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); return err; } static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) { __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); return IP6_ECN_decapsulate(ipv6h, skb); } static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) { if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); return IP6_ECN_decapsulate(ipv6h, skb); } static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) { /* ECN is not supported in AF_MPLS */ return 0; } __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ltype = ipv6_addr_type(laddr); int rtype = ipv6_addr_type(raddr); __u32 flags = 0; if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { flags = IP6_TNL_F_CAP_PER_PACKET; } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { if (ltype&IPV6_ADDR_UNICAST) flags |= IP6_TNL_F_CAP_XMIT; if (rtype&IPV6_ADDR_UNICAST) flags |= IP6_TNL_F_CAP_RCV; } return flags; } EXPORT_SYMBOL(ip6_tnl_get_cap); /* called with rcu_read_lock() */ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = t->net; if ((p->flags & IP6_TNL_F_CAP_RCV) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { struct net_device *ldev = NULL; if (p->link) ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(laddr) || likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) && ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE)))) ret = 1; } return ret; } EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb), bool log_ecn_err) { const struct ipv6hdr *ipv6h; int nh, err; if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) != test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) { DEV_STATS_INC(tunnel->dev, rx_crc_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) { if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) || (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { DEV_STATS_INC(tunnel->dev, rx_fifo_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } tunnel->i_seqno = ntohl(tpi->seq) + 1; } skb->protocol = tpi->proto; /* Warning: All skb pointers will be invalidated! */ if (tunnel->dev->type == ARPHRD_ETHER) { if (!pskb_may_pull(skb, ETH_HLEN)) { DEV_STATS_INC(tunnel->dev, rx_length_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { skb->dev = tunnel->dev; skb_reset_mac_header(skb); } /* Save offset of outer header relative to skb->head, * because we are going to reset the network header to the inner header * and might change skb->head. */ nh = skb_network_header(skb) - skb->head; skb_reset_network_header(skb); if (!pskb_inet_may_pull(skb)) { DEV_STATS_INC(tunnel->dev, rx_length_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } /* Get the outer header. */ ipv6h = (struct ipv6hdr *)(skb->head + nh); memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); err = dscp_ecn_decapsulate(tunnel, ipv6h, skb); if (unlikely(err)) { if (log_ecn_err) net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n", &ipv6h->saddr, ipv6_get_dsfield(ipv6h)); if (err > 1) { DEV_STATS_INC(tunnel->dev, rx_frame_errors); DEV_STATS_INC(tunnel->dev, rx_errors); goto drop; } } dev_sw_netstats_rx_add(tunnel->dev, skb->len); skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); if (tun_dst) skb_dst_set(skb, (struct dst_entry *)tun_dst); gro_cells_receive(&tunnel->gro_cells, skb); return 0; drop: if (tun_dst) dst_release((struct dst_entry *)tun_dst); kfree_skb(skb); return 0; } int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_err) { int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb); dscp_ecn_decapsulate = ip6ip6_dscp_ecn_decapsulate; if (tpi->proto == htons(ETH_P_IP)) dscp_ecn_decapsulate = ip4ip6_dscp_ecn_decapsulate; return __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_err); } EXPORT_SYMBOL(ip6_tnl_rcv); static const struct tnl_ptk_info tpi_v6 = { /* no tunnel info required for ipxip6. */ .proto = htons(ETH_P_IPV6), }; static const struct tnl_ptk_info tpi_v4 = { /* no tunnel info required for ipxip6. */ .proto = htons(ETH_P_IP), }; static const struct tnl_ptk_info tpi_mpls = { /* no tunnel info required for mplsip6. */ .proto = htons(ETH_P_MPLS_UC), }; static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, const struct tnl_ptk_info *tpi, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb)) { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct metadata_dst *tun_dst = NULL; int ret = -1; rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), skb->dev->ifindex, &ipv6h->saddr, &ipv6h->daddr); if (t) { u8 tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; ipv6h = ipv6_hdr(skb); if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) goto drop; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; if (t->parms.collect_md) { IP_TUNNEL_DECLARE_FLAGS(flags) = { }; tun_dst = ipv6_tun_rx_dst(skb, flags, 0, 0); if (!tun_dst) goto drop; } ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); } rcu_read_unlock(); return ret; drop: rcu_read_unlock(); kfree_skb(skb); return 0; } static int ip4ip6_rcv(struct sk_buff *skb) { return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4, ip4ip6_dscp_ecn_decapsulate); } static int ip6ip6_rcv(struct sk_buff *skb) { return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6, ip6ip6_dscp_ecn_decapsulate); } static int mplsip6_rcv(struct sk_buff *skb) { return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls, mplsip6_dscp_ecn_decapsulate); } struct ipv6_tel_txoption { struct ipv6_txoptions ops; __u8 dst_opt[8]; }; static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) { memset(opt, 0, sizeof(struct ipv6_tel_txoption)); opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; opt->dst_opt[3] = 1; opt->dst_opt[4] = encap_limit; opt->dst_opt[5] = IPV6_TLV_PADN; opt->dst_opt[6] = 1; opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt; opt->ops.opt_nflen = 8; } /** * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device * @hdr: IPv6 header from the incoming packet * * Description: * Avoid trivial tunneling loop by checking that tunnel exit-point * doesn't match source of incoming packet. * * Return: * 1 if conflict, * 0 else **/ static inline bool ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) { return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = t->net; if (t->parms.collect_md) return 1; if ((p->flags & IP6_TNL_F_CAP_XMIT) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { struct net_device *ldev = NULL; rcu_read_lock(); if (p->link) ldev = dev_get_by_index_rcu(net, p->link); if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else ret = 1; rcu_read_unlock(); } return ret; } EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); /** * ip6_tnl_xmit - encapsulate packet and send * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device * @dsfield: dscp code for outer header * @fl6: flow of tunneled packet * @encap_limit: encapsulation limit * @pmtu: Path MTU is stored if packet is too big * @proto: next header value * * Description: * Build new header and do some sanity checks on the packet before sending * it. * * Return: * 0 on success * -1 fail * %-EMSGSIZE message too big. return mtu in this case. **/ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct flowi6 *fl6, int encap_limit, __u32 *pmtu, __u8 proto) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct ipv6hdr *ipv6h; struct ipv6_tel_txoption opt; struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; __be16 payload_protocol; bool use_cache = false; u8 hop_limit; int err = -1; payload_protocol = skb_protocol(skb, true); if (t->parms.collect_md) { hop_limit = skb_tunnel_info(skb)->key.ttl; goto route_lookup; } else { hop_limit = t->parms.hop_limit; } /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { if (payload_protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; struct neighbour *neigh; int addr_type; if (!skb_dst(skb)) goto tx_err_link_failure; neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); if (!neigh) goto tx_err_link_failure; addr6 = (struct in6_addr *)&neigh->primary_key; addr_type = ipv6_addr_type(addr6); if (addr_type == IPV6_ADDR_ANY) addr6 = &ipv6_hdr(skb)->daddr; memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); } else if (payload_protocol == htons(ETH_P_IP)) { const struct rtable *rt = skb_rtable(skb); if (!rt) goto tx_err_link_failure; if (rt->rt_gw_family == AF_INET6) memcpy(&fl6->daddr, &rt->rt_gw6, sizeof(fl6->daddr)); } } else if (t->parms.proto != 0 && !(t->parms.flags & (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { /* enable the cache only if neither the outer protocol nor the * routing decision depends on the current inner header value */ use_cache = true; } if (use_cache) dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) goto tx_err_link_failure; if (!dst) { route_lookup: /* add dsfield to flowlabel for route lookup */ fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel); dst = ip6_route_output(net, NULL, fl6); if (dst->error) goto tx_err_link_failure; dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto tx_err_link_failure; } if (t->parms.collect_md && ipv6_addr_any(&fl6->saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, &fl6->daddr, 0, &fl6->saddr)) goto tx_err_link_failure; ndst = dst; } tdev = dst->dev; if (tdev == dev) { DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", t->parms.name); goto tx_err_dst_release; } mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; } mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? IPV6_MIN_MTU : IPV4_MIN_MTU); skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; } if (t->err_count > 0) { if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) { t->err_count--; dst_link_failure(skb); } else { t->err_count = 0; } } skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); /* * Okay, now see if we can stuff it in the buffer as-is. */ max_headroom += LL_RESERVED_SPACE(tdev); if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) goto tx_err_dst_release; if (skb->sk) skb_set_owner_w(new_skb, skb->sk); consume_skb(skb); skb = new_skb; } if (t->parms.collect_md) { if (t->encap.type != TUNNEL_ENCAP_NONE) goto tx_err_dst_release; } else { if (use_cache && ndst) dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); } skb_dst_set(skb, dst); if (hop_limit == 0) { if (payload_protocol == htons(ETH_P_IP)) hop_limit = ip_hdr(skb)->ttl; else if (payload_protocol == htons(ETH_P_IPV6)) hop_limit = ipv6_hdr(skb)->hop_limit; else hop_limit = ip6_dst_hoplimit(dst); } /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. */ max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; if (max_headroom > READ_ONCE(dev->needed_headroom)) WRITE_ONCE(dev->needed_headroom, max_headroom); err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) return err; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_frag_opts(skb, &opt.ops, &proto); } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, dsfield, ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; ip6tunnel_xmit(NULL, skb, dev); return 0; tx_err_link_failure: DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); return err; } EXPORT_SYMBOL(ip6_tnl_xmit); static inline int ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, u8 protocol) { struct ip6_tnl *t = netdev_priv(dev); struct ipv6hdr *ipv6h; const struct iphdr *iph; int encap_limit = -1; __u16 offset; struct flowi6 fl6; __u8 dsfield, orig_dsfield; __u32 mtu; u8 tproto; int err; tproto = READ_ONCE(t->parms.proto); if (tproto != protocol && tproto != 0) return -1; if (t->parms.collect_md) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || ip_tunnel_info_af(tun_info) != AF_INET6)) return -1; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = protocol; fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; switch (protocol) { case IPPROTO_IPIP: iph = ip_hdr(skb); orig_dsfield = ipv4_get_dsfield(iph); break; case IPPROTO_IPV6: ipv6h = ipv6_hdr(skb); orig_dsfield = ipv6_get_dsfield(ipv6h); break; default: orig_dsfield = dsfield; break; } } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; if (protocol == IPPROTO_IPV6) { offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); /* ip6_tnl_parse_tlv_enc_lim() might have * reallocated skb->head */ if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (void *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offset + 2); return -1; } encap_limit = tel->encap_limit - 1; } } memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = protocol; if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else fl6.flowi6_mark = t->parms.fwmark; switch (protocol) { case IPPROTO_IPIP: iph = ip_hdr(skb); orig_dsfield = ipv4_get_dsfield(iph); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) dsfield = orig_dsfield; else dsfield = ip6_tclass(t->parms.flowinfo); break; case IPPROTO_IPV6: ipv6h = ipv6_hdr(skb); orig_dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) dsfield = orig_dsfield; else dsfield = ip6_tclass(t->parms.flowinfo); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) fl6.flowlabel |= ip6_flowlabel(ipv6h); break; default: orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo); break; } } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; skb_set_inner_ipproto(skb, protocol); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, protocol); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) switch (protocol) { case IPPROTO_IPIP: icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); break; case IPPROTO_IPV6: icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); break; default: break; } return -1; } return 0; } static netdev_tx_t ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); u8 ipproto; int ret; if (!pskb_inet_may_pull(skb)) goto tx_err; switch (skb->protocol) { case htons(ETH_P_IP): ipproto = IPPROTO_IPIP; break; case htons(ETH_P_IPV6): if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb))) goto tx_err; ipproto = IPPROTO_IPV6; break; case htons(ETH_P_MPLS_UC): ipproto = IPPROTO_MPLS; break; default: goto tx_err; } ret = ipxip6_tnl_xmit(skb, dev, ipproto); if (ret < 0) goto tx_err; return NETDEV_TX_OK; tx_err: DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct net_device *tdev = NULL; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; int t_hlen; int mtu; __dev_addr_set(dev, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ fl6->saddr = p->laddr; fl6->daddr = p->raddr; fl6->flowi6_oif = p->link; fl6->flowlabel = 0; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; t->tun_hlen = 0; t->hlen = t->encap_hlen + t->tun_hlen; t_hlen = t->hlen + sizeof(struct ipv6hdr); if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, NULL, strict); if (rt) { tdev = rt->dst.dev; ip6_rt_put(rt); } if (!tdev && p->link) tdev = __dev_get_by_index(t->net, p->link); if (tdev) { dev->needed_headroom = tdev->hard_header_len + tdev->needed_headroom + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); mtu = mtu - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) mtu -= 8; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; WRITE_ONCE(dev->mtu, mtu); } } } /** * ip6_tnl_change - update the tunnel parameters * @t: tunnel to be changed * @p: tunnel configuration parameters * * Description: * ip6_tnl_change() updates the tunnel parameters **/ static void ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; t->parms.flags = p->flags; t->parms.hop_limit = p->hop_limit; t->parms.encap_limit = p->encap_limit; t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); } static void ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); ip6_tnl_unlink(ip6n, t); synchronize_net(); ip6_tnl_change(t, p); ip6_tnl_link(ip6n, t); netdev_state_change(t->dev); } static void ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { /* for default tnl0 device allow to change only the proto */ t->parms.proto = p->proto; netdev_state_change(t->dev); } static void ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) { p->laddr = u->laddr; p->raddr = u->raddr; p->flags = u->flags; p->hop_limit = u->hop_limit; p->encap_limit = u->encap_limit; p->flowinfo = u->flowinfo; p->link = u->link; p->proto = u->proto; memcpy(p->name, u->name, sizeof(u->name)); } static void ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) { u->laddr = p->laddr; u->raddr = p->raddr; u->flags = p->flags; u->hop_limit = p->hop_limit; u->encap_limit = p->encap_limit; u->flowinfo = p->flowinfo; u->link = p->link; u->proto = p->proto; memcpy(u->name, p->name, sizeof(u->name)); } /** * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel * @ifr: unused * @data: parameters passed from userspace * @cmd: command to be performed * * Description: * ip6_tnl_ioctl() is used for managing IPv6 tunnels * from userspace. * * The possible commands are the following: * %SIOCGETTUNNEL: get tunnel parameters for device * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters * %SIOCCHGTUNNEL: change tunnel parameters to those given * %SIOCDELTUNNEL: delete tunnel * * The fallback device "ip6tnl0", created during module * initialization, can be used for creating other tunnel devices. * * Return: * 0 on success, * %-EFAULT if unable to copy data to or from userspace, * %-EPERM if current process hasn't %CAP_NET_ADMIN set * %-EINVAL if passed tunnel parameters are invalid, * %-EEXIST if changing a tunnel's parameters would cause a conflict * %-ENODEV if attempting to change or delete a nonexisting device **/ static int ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { int err = 0; struct ip6_tnl_parm p; struct __ip6_tnl_parm p1; struct ip6_tnl *t = netdev_priv(dev); struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); memset(&p1, 0, sizeof(p1)); switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { if (copy_from_user(&p, data, sizeof(p))) { err = -EFAULT; break; } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); if (IS_ERR(t)) t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); } ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; break; case SIOCADDTUNNEL: case SIOCCHGTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && p.proto != 0) break; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (cmd == SIOCCHGTUNNEL) { if (!IS_ERR(t)) { if (t->dev != dev) { err = -EEXIST; break; } } else t = netdev_priv(dev); if (dev == ip6n->fb_tnl_dev) ip6_tnl0_update(t, &p1); else ip6_tnl_update(t, &p1); } if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(data, &p, sizeof(p))) err = -EFAULT; } else { err = PTR_ERR(t); } break; case SIOCDELTUNNEL: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; if (copy_from_user(&p, data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); if (IS_ERR(t)) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) break; dev = t->dev; } err = 0; unregister_netdevice(dev); break; default: err = -EINVAL; } return err; } /** * ip6_tnl_change_mtu - change mtu manually for tunnel device * @dev: virtual device associated with tunnel * @new_mtu: the new mtu * * Return: * 0 on success, * %-EINVAL if mtu too small **/ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); int t_hlen; t_hlen = tnl->hlen + sizeof(struct ipv6hdr); if (tnl->parms.proto == IPPROTO_IPV6) { if (new_mtu < IPV6_MIN_MTU) return -EINVAL; } else { if (new_mtu < ETH_MIN_MTU) return -EINVAL; } if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } else { if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL(ip6_tnl_change_mtu); int ip6_tnl_get_iflink(const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); return READ_ONCE(t->parms.link); } EXPORT_SYMBOL(ip6_tnl_get_iflink); int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops, unsigned int num) { if (num >= MAX_IPTUN_ENCAP_OPS) return -ERANGE; return !cmpxchg((const struct ip6_tnl_encap_ops **) &ip6tun_encaps[num], NULL, ops) ? 0 : -1; } EXPORT_SYMBOL(ip6_tnl_encap_add_ops); int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops, unsigned int num) { int ret; if (num >= MAX_IPTUN_ENCAP_OPS) return -ERANGE; ret = (cmpxchg((const struct ip6_tnl_encap_ops **) &ip6tun_encaps[num], ops, NULL) == ops) ? 0 : -1; synchronize_net(); return ret; } EXPORT_SYMBOL(ip6_tnl_encap_del_ops); int ip6_tnl_encap_setup(struct ip6_tnl *t, struct ip_tunnel_encap *ipencap) { int hlen; memset(&t->encap, 0, sizeof(t->encap)); hlen = ip6_encap_hlen(ipencap); if (hlen < 0) return hlen; t->encap.type = ipencap->type; t->encap.sport = ipencap->sport; t->encap.dport = ipencap->dport; t->encap.flags = ipencap->flags; t->encap_hlen = hlen; t->hlen = t->encap_hlen + t->tun_hlen; return 0; } EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup); static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_start_xmit, .ndo_siocdevprivate = ip6_tnl_siocdevprivate, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = ip6_tnl_get_iflink, }; #define IPXIPX_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ NETIF_F_GSO_SOFTWARE | \ NETIF_F_HW_CSUM) /** * ip6_tnl_dev_setup - setup virtual tunnel device * @dev: virtual device associated with tunnel * * Description: * Initialize function pointers and device parameters **/ static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; dev->needs_free_netdev = true; dev->priv_destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); dev->lltx = true; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netif_keep_dst(dev); dev->features |= IPXIPX_FEATURES; dev->hw_features |= IPXIPX_FEATURES; /* This perm addr will be used as interface identifier by IPv6 */ dev->addr_assign_type = NET_ADDR_RANDOM; eth_random_addr(dev->perm_addr); } /** * ip6_tnl_dev_init_gen - general initializer for all tunnel devices * @dev: virtual device associated with tunnel **/ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int ret; int t_hlen; t->dev = dev; ret = dst_cache_init(&t->dst_cache, GFP_KERNEL); if (ret) return ret; ret = gro_cells_init(&t->gro_cells, dev); if (ret) goto destroy_dst; t->tun_hlen = 0; t->hlen = t->encap_hlen + t->tun_hlen; t_hlen = t->hlen + sizeof(struct ipv6hdr); dev->type = ARPHRD_TUNNEL6; dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); return 0; destroy_dst: dst_cache_destroy(&t->dst_cache); return ret; } /** * ip6_tnl_dev_init - initializer for all non fallback tunnel devices * @dev: virtual device associated with tunnel **/ static int ip6_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); int err = ip6_tnl_dev_init_gen(dev); if (err) return err; ip6_tnl_link_config(t); if (t->parms.collect_md) netif_keep_dst(dev); return 0; } /** * ip6_fb_tnl_dev_init - initializer for fallback tunnel device * @dev: fallback device * * Return: 0 **/ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); t->net = net; t->parms.proto = IPPROTO_IPV6; rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u8 proto; if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); if (proto != IPPROTO_IPV6 && proto != IPPROTO_IPIP && proto != 0) return -EINVAL; return 0; } static void ip6_tnl_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { memset(parms, 0, sizeof(*parms)); if (!data) return; if (data[IFLA_IPTUN_LINK]) parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); if (data[IFLA_IPTUN_ENCAP_LIMIT]) parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]); if (data[IFLA_IPTUN_FLOWINFO]) parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]); if (data[IFLA_IPTUN_FLAGS]) parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]); if (data[IFLA_IPTUN_PROTO]) parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); if (data[IFLA_IPTUN_COLLECT_METADATA]) parms->collect_md = true; if (data[IFLA_IPTUN_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } static int ip6_tnl_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct nlattr **data = params->data; struct nlattr **tb = params->tb; struct ip_tunnel_encap ipencap; struct ip6_tnl_net *ip6n; struct ip6_tnl *nt, *t; struct net *net; int err; net = params->link_net ? : dev_net(dev); ip6n = net_generic(net, ip6_tnl_net_id); nt = netdev_priv(dev); nt->net = net; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return err; } ip6_tnl_netlink_parms(data, &nt->parms); if (nt->parms.collect_md) { if (rtnl_dereference(ip6n->collect_md_tun)) return -EEXIST; } else { t = ip6_tnl_locate(net, &nt->parms, 0); if (!IS_ERR(t)) return -EEXIST; } err = ip6_tnl_create2(dev); if (!err && tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); return err; } static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip_tunnel_encap ipencap; if (dev == ip6n->fb_tnl_dev) return -EINVAL; if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(t, &ipencap); if (err < 0) return err; } ip6_tnl_netlink_parms(data, &p); if (p.collect_md) return -EINVAL; t = ip6_tnl_locate(net, &p, 0); if (!IS_ERR(t)) { if (t->dev != dev) return -EEXIST; } else t = netdev_priv(dev); ip6_tnl_update(t, &p); return 0; } static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) { struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev != ip6n->fb_tnl_dev) unregister_netdevice_queue(dev, head); } static size_t ip6_tnl_get_size(const struct net_device *dev) { return /* IFLA_IPTUN_LINK */ nla_total_size(4) + /* IFLA_IPTUN_LOCAL */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_IPTUN_REMOTE */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_IPTUN_TTL */ nla_total_size(1) + /* IFLA_IPTUN_ENCAP_LIMIT */ nla_total_size(1) + /* IFLA_IPTUN_FLOWINFO */ nla_total_size(4) + /* IFLA_IPTUN_FLAGS */ nla_total_size(4) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + /* IFLA_IPTUN_ENCAP_TYPE */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_FLAGS */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_SPORT */ nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + /* IFLA_IPTUN_COLLECT_METADATA */ nla_total_size(0) + /* IFLA_IPTUN_FWMARK */ nla_total_size(4) + 0; } static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; if (parm->collect_md) if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } struct net *ip6_tnl_get_link_net(const struct net_device *dev) { struct ip6_tnl *tunnel = netdev_priv(dev); return READ_ONCE(tunnel->net); } EXPORT_SYMBOL(ip6_tnl_get_link_net); static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_TTL] = { .type = NLA_U8 }, [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { .kind = "ip6tnl", .maxtype = IFLA_IPTUN_MAX, .policy = ip6_tnl_policy, .priv_size = sizeof(struct ip6_tnl), .setup = ip6_tnl_dev_setup, .validate = ip6_tnl_validate, .newlink = ip6_tnl_newlink, .changelink = ip6_tnl_changelink, .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, .get_link_net = ip6_tnl_get_link_net, }; static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { .handler = ip4ip6_rcv, .err_handler = ip4ip6_err, .priority = 1, }; static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .handler = ip6ip6_rcv, .err_handler = ip6ip6_err, .priority = 1, }; static struct xfrm6_tunnel mplsip6_handler __read_mostly = { .handler = mplsip6_rcv, .err_handler = mplsip6_err, .priority = 1, }; static void __net_exit ip6_tnl_exit_rtnl_net(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct net_device *dev, *aux; int h; struct ip6_tnl *t; for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6_link_ops) unregister_netdevice_queue(dev, list); for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, list); t = rtnl_net_dereference(net, t->next); } } t = rtnl_net_dereference(net, ip6n->tnls_wc[0]); while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) unregister_netdevice_queue(t->dev, list); t = rtnl_net_dereference(net, t->next); } } static int __net_init ip6_tnl_init_net(struct net *net) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip6_tnl *t = NULL; int err; ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; if (!net_has_fallback_tunnels(net)) return 0; err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ ip6n->fb_tnl_dev->netns_immutable = true; err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) goto err_register; err = register_netdev(ip6n->fb_tnl_dev); if (err < 0) goto err_register; t = netdev_priv(ip6n->fb_tnl_dev); strcpy(t->parms.name, ip6n->fb_tnl_dev->name); return 0; err_register: free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, .exit_rtnl = ip6_tnl_exit_rtnl_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; /** * ip6_tunnel_init - register protocol and reserve needed resources * * Return: 0 on success **/ static int __init ip6_tunnel_init(void) { int err; if (!ipv6_mod_enabled()) return -EOPNOTSUPP; err = register_pernet_device(&ip6_tnl_net_ops); if (err < 0) goto out_pernet; err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET); if (err < 0) { pr_err("%s: can't register ip4ip6\n", __func__); goto out_ip4ip6; } err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6); if (err < 0) { pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } if (ip6_tnl_mpls_supported()) { err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS); if (err < 0) { pr_err("%s: can't register mplsip6\n", __func__); goto out_mplsip6; } } err = rtnl_link_register(&ip6_link_ops); if (err < 0) goto rtnl_link_failed; return 0; rtnl_link_failed: if (ip6_tnl_mpls_supported()) xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS); out_mplsip6: xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); out_ip4ip6: unregister_pernet_device(&ip6_tnl_net_ops); out_pernet: return err; } /** * ip6_tunnel_cleanup - free resources and unregister protocol **/ static void __exit ip6_tunnel_cleanup(void) { rtnl_link_unregister(&ip6_link_ops); if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) pr_info("%s: can't deregister ip4ip6\n", __func__); if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) pr_info("%s: can't deregister ip6ip6\n", __func__); if (ip6_tnl_mpls_supported() && xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS)) pr_info("%s: can't deregister mplsip6\n", __func__); unregister_pernet_device(&ip6_tnl_net_ops); } module_init(ip6_tunnel_init); module_exit(ip6_tunnel_cleanup);
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_IOBITMAP_H #define _ASM_X86_IOBITMAP_H #include <linux/refcount.h> #include <asm/processor.h> struct io_bitmap { u64 sequence; refcount_t refcnt; /* The maximum number of bytes to copy so all zero bits are covered */ unsigned int max; unsigned long bitmap[IO_BITMAP_LONGS]; }; struct task_struct; #ifdef CONFIG_X86_IOPL_IOPERM void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(struct task_struct *tsk); static inline void native_tss_invalidate_io_bitmap(void) { /* * Invalidate the I/O bitmap by moving io_bitmap_base outside the * TSS limit so any subsequent I/O access from user space will * trigger a #GP. * * This is correct even when VMEXIT rewrites the TSS limit * to 0x67 as the only requirement is that the base points * outside the limit. */ this_cpu_write(cpu_tss_rw.x86_tss.io_bitmap_base, IO_BITMAP_OFFSET_INVALID); } void native_tss_update_io_bitmap(void); #ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define tss_update_io_bitmap native_tss_update_io_bitmap #define tss_invalidate_io_bitmap native_tss_invalidate_io_bitmap #endif #else static inline void io_bitmap_share(struct task_struct *tsk) { } static inline void io_bitmap_exit(struct task_struct *tsk) { } static inline void tss_update_io_bitmap(void) { } #endif #endif
19 5 62 13 103 47 62 62 62 62 62 21 101 59 4 5 4 7 7 21 17 10 11 20 10 21 21 23 66 65 63 42 23 16 5 96 96 99 99 35 73 99 9 96 99 13 13 3 3 3 1 3 21 14 10 28 12 16 14 15 28 9 2 13 21 3 3 3 28 28 37 38 38 155 155 153 152 155 155 37 37 2 37 37 47 47 47 3 46 46 47 47 47 37 37 2 1 1 4 1 4 4 3 3 3 1 3 3 3 3 2 1 1 1 1 1 3 3 1 1 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 // SPDX-License-Identifier: GPL-2.0-or-later /* * Loopback soundcard * * Original code: * Copyright (c) by Jaroslav Kysela <perex@perex.cz> * * More accurate positioning and full-duplex support: * Copyright (c) Ahmet İnan <ainan at mathematik.uni-freiburg.de> * * Major (almost complete) rewrite: * Copyright (c) by Takashi Iwai <tiwai@suse.de> * * A next major update in 2010 (separate timers for playback and capture): * Copyright (c) Jaroslav Kysela <perex@perex.cz> */ #include <linux/init.h> #include <linux/jiffies.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/wait.h> #include <linux/module.h> #include <linux/platform_device.h> #include <sound/core.h> #include <sound/control.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include <sound/info.h> #include <sound/initval.h> #include <sound/timer.h> MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("A loopback soundcard"); MODULE_LICENSE("GPL"); #define MAX_PCM_SUBSTREAMS 8 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-MAX */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* ID for this card */ static bool enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 0}; static int pcm_substreams[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 8}; static int pcm_notify[SNDRV_CARDS]; static char *timer_source[SNDRV_CARDS]; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for loopback soundcard."); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for loopback soundcard."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable this loopback soundcard."); module_param_array(pcm_substreams, int, NULL, 0444); MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-8) for loopback driver."); module_param_array(pcm_notify, int, NULL, 0444); MODULE_PARM_DESC(pcm_notify, "Break capture when PCM format/rate/channels changes."); module_param_array(timer_source, charp, NULL, 0444); MODULE_PARM_DESC(timer_source, "Sound card name or number and device/subdevice number of timer to be used. Empty string for jiffies timer [default]."); #define NO_PITCH 100000 #define CABLE_VALID_PLAYBACK BIT(SNDRV_PCM_STREAM_PLAYBACK) #define CABLE_VALID_CAPTURE BIT(SNDRV_PCM_STREAM_CAPTURE) #define CABLE_VALID_BOTH (CABLE_VALID_PLAYBACK | CABLE_VALID_CAPTURE) struct loopback_cable; struct loopback_pcm; struct loopback_ops { /* optional * call in loopback->cable_lock */ int (*open)(struct loopback_pcm *dpcm); /* required * call in cable->lock */ int (*start)(struct loopback_pcm *dpcm); /* required * call in cable->lock */ int (*stop)(struct loopback_pcm *dpcm); /* optional */ int (*stop_sync)(struct loopback_pcm *dpcm); /* optional */ int (*close_substream)(struct loopback_pcm *dpcm); /* optional * call in loopback->cable_lock */ int (*close_cable)(struct loopback_pcm *dpcm); /* optional * call in cable->lock */ unsigned int (*pos_update)(struct loopback_cable *cable); /* optional */ void (*dpcm_info)(struct loopback_pcm *dpcm, struct snd_info_buffer *buffer); }; struct loopback_cable { spinlock_t lock; struct loopback_pcm *streams[2]; struct snd_pcm_hardware hw; /* flags */ unsigned int valid; unsigned int running; unsigned int pause; /* timer specific */ const struct loopback_ops *ops; /* If sound timer is used */ struct { int stream; struct snd_timer_id id; struct work_struct event_work; struct snd_timer_instance *instance; } snd_timer; }; struct loopback_setup { unsigned int notify: 1; unsigned int rate_shift; snd_pcm_format_t format; unsigned int rate; snd_pcm_access_t access; unsigned int channels; struct snd_ctl_elem_id active_id; struct snd_ctl_elem_id format_id; struct snd_ctl_elem_id rate_id; struct snd_ctl_elem_id channels_id; struct snd_ctl_elem_id access_id; }; struct loopback { struct snd_card *card; struct mutex cable_lock; struct loopback_cable *cables[MAX_PCM_SUBSTREAMS][2]; struct snd_pcm *pcm[2]; struct loopback_setup setup[MAX_PCM_SUBSTREAMS][2]; const char *timer_source; }; struct loopback_pcm { struct loopback *loopback; struct snd_pcm_substream *substream; struct loopback_cable *cable; unsigned int pcm_buffer_size; unsigned int buf_pos; /* position in buffer */ unsigned int silent_size; /* PCM parameters */ unsigned int pcm_period_size; unsigned int pcm_bps; /* bytes per second */ unsigned int pcm_salign; /* bytes per sample * channels */ unsigned int pcm_rate_shift; /* rate shift value */ /* flags */ unsigned int period_update_pending :1; /* timer stuff */ unsigned int irq_pos; /* fractional IRQ position in jiffies * ticks */ unsigned int period_size_frac; /* period size in jiffies ticks */ unsigned int last_drift; unsigned long last_jiffies; /* If jiffies timer is used */ struct timer_list timer; /* size of per channel buffer in case of non-interleaved access */ unsigned int channel_buf_n; }; static struct platform_device *devices[SNDRV_CARDS]; static inline unsigned int byte_pos(struct loopback_pcm *dpcm, unsigned int x) { if (dpcm->pcm_rate_shift == NO_PITCH) { x /= HZ; } else { x = div_u64(NO_PITCH * (unsigned long long)x, HZ * (unsigned long long)dpcm->pcm_rate_shift); } return x - (x % dpcm->pcm_salign); } static inline unsigned int frac_pos(struct loopback_pcm *dpcm, unsigned int x) { if (dpcm->pcm_rate_shift == NO_PITCH) { /* no pitch */ return x * HZ; } else { x = div_u64(dpcm->pcm_rate_shift * (unsigned long long)x * HZ, NO_PITCH); } return x; } static inline struct loopback_setup *get_setup(struct loopback_pcm *dpcm) { int device = dpcm->substream->pstr->pcm->device; if (dpcm->substream->stream == SNDRV_PCM_STREAM_PLAYBACK) device ^= 1; return &dpcm->loopback->setup[dpcm->substream->number][device]; } static inline unsigned int get_notify(struct loopback_pcm *dpcm) { return get_setup(dpcm)->notify; } static inline unsigned int get_rate_shift(struct loopback_pcm *dpcm) { return get_setup(dpcm)->rate_shift; } /* call in cable->lock */ static int loopback_jiffies_timer_start(struct loopback_pcm *dpcm) { unsigned long tick; unsigned int rate_shift = get_rate_shift(dpcm); if (rate_shift != dpcm->pcm_rate_shift) { dpcm->pcm_rate_shift = rate_shift; dpcm->period_size_frac = frac_pos(dpcm, dpcm->pcm_period_size); } if (dpcm->period_size_frac <= dpcm->irq_pos) { dpcm->irq_pos %= dpcm->period_size_frac; dpcm->period_update_pending = 1; } tick = dpcm->period_size_frac - dpcm->irq_pos; tick = DIV_ROUND_UP(tick, dpcm->pcm_bps); mod_timer(&dpcm->timer, jiffies + tick); return 0; } /* call in cable->lock */ static int loopback_snd_timer_start(struct loopback_pcm *dpcm) { struct loopback_cable *cable = dpcm->cable; int err; /* Loopback device has to use same period as timer card. Therefore * wake up for each snd_pcm_period_elapsed() call of timer card. */ err = snd_timer_start(cable->snd_timer.instance, 1); if (err < 0) { /* do not report error if trying to start but already * running. For example called by opposite substream * of the same cable */ if (err == -EBUSY) return 0; pcm_err(dpcm->substream->pcm, "snd_timer_start(%d,%d,%d) failed with %d", cable->snd_timer.id.card, cable->snd_timer.id.device, cable->snd_timer.id.subdevice, err); } return err; } /* call in cable->lock */ static inline int loopback_jiffies_timer_stop(struct loopback_pcm *dpcm) { timer_delete(&dpcm->timer); dpcm->timer.expires = 0; return 0; } /* call in cable->lock */ static int loopback_snd_timer_stop(struct loopback_pcm *dpcm) { struct loopback_cable *cable = dpcm->cable; int err; /* only stop if both devices (playback and capture) are not running */ if (cable->running ^ cable->pause) return 0; err = snd_timer_stop(cable->snd_timer.instance); if (err < 0) { pcm_err(dpcm->substream->pcm, "snd_timer_stop(%d,%d,%d) failed with %d", cable->snd_timer.id.card, cable->snd_timer.id.device, cable->snd_timer.id.subdevice, err); } return err; } static inline int loopback_jiffies_timer_stop_sync(struct loopback_pcm *dpcm) { timer_delete_sync(&dpcm->timer); return 0; } /* call in loopback->cable_lock */ static int loopback_snd_timer_close_cable(struct loopback_pcm *dpcm) { struct loopback_cable *cable = dpcm->cable; /* snd_timer was not opened */ if (!cable->snd_timer.instance) return 0; /* will only be called from free_cable() when other stream was * already closed. Other stream cannot be reopened as long as * loopback->cable_lock is locked. Therefore no need to lock * cable->lock; */ snd_timer_close(cable->snd_timer.instance); /* wait till drain work has finished if requested */ cancel_work_sync(&cable->snd_timer.event_work); snd_timer_instance_free(cable->snd_timer.instance); memset(&cable->snd_timer, 0, sizeof(cable->snd_timer)); return 0; } static bool is_access_interleaved(snd_pcm_access_t access) { switch (access) { case SNDRV_PCM_ACCESS_MMAP_INTERLEAVED: case SNDRV_PCM_ACCESS_RW_INTERLEAVED: return true; default: return false; } }; static int loopback_check_format(struct loopback_cable *cable, int stream) { struct snd_pcm_runtime *runtime, *cruntime; struct loopback_setup *setup; struct snd_card *card; int check; if (cable->valid != CABLE_VALID_BOTH) { if (stream == SNDRV_PCM_STREAM_PLAYBACK) goto __notify; return 0; } runtime = cable->streams[SNDRV_PCM_STREAM_PLAYBACK]-> substream->runtime; cruntime = cable->streams[SNDRV_PCM_STREAM_CAPTURE]-> substream->runtime; check = runtime->format != cruntime->format || runtime->rate != cruntime->rate || runtime->channels != cruntime->channels || is_access_interleaved(runtime->access) != is_access_interleaved(cruntime->access); if (!check) return 0; if (stream == SNDRV_PCM_STREAM_CAPTURE) { return -EIO; } else { snd_pcm_stop(cable->streams[SNDRV_PCM_STREAM_CAPTURE]-> substream, SNDRV_PCM_STATE_DRAINING); __notify: runtime = cable->streams[SNDRV_PCM_STREAM_PLAYBACK]-> substream->runtime; setup = get_setup(cable->streams[SNDRV_PCM_STREAM_PLAYBACK]); card = cable->streams[SNDRV_PCM_STREAM_PLAYBACK]->loopback->card; if (setup->format != runtime->format) { snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &setup->format_id); setup->format = runtime->format; } if (setup->rate != runtime->rate) { snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &setup->rate_id); setup->rate = runtime->rate; } if (setup->channels != runtime->channels) { snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &setup->channels_id); setup->channels = runtime->channels; } if (is_access_interleaved(setup->access) != is_access_interleaved(runtime->access)) { snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &setup->access_id); setup->access = runtime->access; } } return 0; } static void loopback_active_notify(struct loopback_pcm *dpcm) { snd_ctl_notify(dpcm->loopback->card, SNDRV_CTL_EVENT_MASK_VALUE, &get_setup(dpcm)->active_id); } static int loopback_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback_pcm *dpcm = runtime->private_data; struct loopback_cable *cable = dpcm->cable; int err = 0, stream = 1 << substream->stream; switch (cmd) { case SNDRV_PCM_TRIGGER_START: err = loopback_check_format(cable, substream->stream); if (err < 0) return err; dpcm->last_jiffies = jiffies; dpcm->pcm_rate_shift = 0; dpcm->last_drift = 0; spin_lock(&cable->lock); cable->running |= stream; cable->pause &= ~stream; err = cable->ops->start(dpcm); spin_unlock(&cable->lock); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) loopback_active_notify(dpcm); break; case SNDRV_PCM_TRIGGER_STOP: spin_lock(&cable->lock); cable->running &= ~stream; cable->pause &= ~stream; err = cable->ops->stop(dpcm); spin_unlock(&cable->lock); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) loopback_active_notify(dpcm); break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: case SNDRV_PCM_TRIGGER_SUSPEND: spin_lock(&cable->lock); cable->pause |= stream; err = cable->ops->stop(dpcm); spin_unlock(&cable->lock); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) loopback_active_notify(dpcm); break; case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: spin_lock(&cable->lock); dpcm->last_jiffies = jiffies; cable->pause &= ~stream; err = cable->ops->start(dpcm); spin_unlock(&cable->lock); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) loopback_active_notify(dpcm); break; default: return -EINVAL; } return err; } static void params_change(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback_pcm *dpcm = runtime->private_data; struct loopback_cable *cable = dpcm->cable; cable->hw.formats = pcm_format_to_bits(runtime->format); cable->hw.rate_min = runtime->rate; cable->hw.rate_max = runtime->rate; cable->hw.channels_min = runtime->channels; cable->hw.channels_max = runtime->channels; if (cable->snd_timer.instance) { cable->hw.period_bytes_min = frames_to_bytes(runtime, runtime->period_size); cable->hw.period_bytes_max = cable->hw.period_bytes_min; } } static int loopback_prepare(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback_pcm *dpcm = runtime->private_data; struct loopback_cable *cable = dpcm->cable; int err, bps, salign; if (cable->ops->stop_sync) { err = cable->ops->stop_sync(dpcm); if (err < 0) return err; } salign = (snd_pcm_format_physical_width(runtime->format) * runtime->channels) / 8; bps = salign * runtime->rate; if (bps <= 0 || salign <= 0) return -EINVAL; dpcm->buf_pos = 0; dpcm->pcm_buffer_size = frames_to_bytes(runtime, runtime->buffer_size); dpcm->channel_buf_n = dpcm->pcm_buffer_size / runtime->channels; if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { /* clear capture buffer */ dpcm->silent_size = dpcm->pcm_buffer_size; snd_pcm_format_set_silence(runtime->format, runtime->dma_area, runtime->buffer_size * runtime->channels); } dpcm->irq_pos = 0; dpcm->period_update_pending = 0; dpcm->pcm_bps = bps; dpcm->pcm_salign = salign; dpcm->pcm_period_size = frames_to_bytes(runtime, runtime->period_size); mutex_lock(&dpcm->loopback->cable_lock); if (!(cable->valid & ~(1 << substream->stream)) || (get_setup(dpcm)->notify && substream->stream == SNDRV_PCM_STREAM_PLAYBACK)) params_change(substream); cable->valid |= 1 << substream->stream; mutex_unlock(&dpcm->loopback->cable_lock); return 0; } static void clear_capture_buf(struct loopback_pcm *dpcm, unsigned int bytes) { struct snd_pcm_runtime *runtime = dpcm->substream->runtime; char *dst = runtime->dma_area; unsigned int dst_off = dpcm->buf_pos; if (dpcm->silent_size >= dpcm->pcm_buffer_size) return; if (dpcm->silent_size + bytes > dpcm->pcm_buffer_size) bytes = dpcm->pcm_buffer_size - dpcm->silent_size; for (;;) { unsigned int size = bytes; if (dst_off + size > dpcm->pcm_buffer_size) size = dpcm->pcm_buffer_size - dst_off; snd_pcm_format_set_silence(runtime->format, dst + dst_off, bytes_to_frames(runtime, size) * runtime->channels); dpcm->silent_size += size; bytes -= size; if (!bytes) break; dst_off = 0; } } static void copy_play_buf_part_n(struct loopback_pcm *play, struct loopback_pcm *capt, unsigned int size, unsigned int src_off, unsigned int dst_off) { unsigned int channels = capt->substream->runtime->channels; unsigned int size_p_ch = size / channels; unsigned int src_off_ch = src_off / channels; unsigned int dst_off_ch = dst_off / channels; int i; for (i = 0; i < channels; i++) { memcpy(capt->substream->runtime->dma_area + capt->channel_buf_n * i + dst_off_ch, play->substream->runtime->dma_area + play->channel_buf_n * i + src_off_ch, size_p_ch); } } static void copy_play_buf(struct loopback_pcm *play, struct loopback_pcm *capt, unsigned int bytes) { struct snd_pcm_runtime *runtime = play->substream->runtime; char *src = runtime->dma_area; char *dst = capt->substream->runtime->dma_area; unsigned int src_off = play->buf_pos; unsigned int dst_off = capt->buf_pos; unsigned int clear_bytes = 0; /* check if playback is draining, trim the capture copy size * when our pointer is at the end of playback ring buffer */ if (runtime->state == SNDRV_PCM_STATE_DRAINING && snd_pcm_playback_hw_avail(runtime) < runtime->buffer_size) { snd_pcm_uframes_t appl_ptr, appl_ptr1, diff; appl_ptr = appl_ptr1 = runtime->control->appl_ptr; appl_ptr1 -= appl_ptr1 % runtime->buffer_size; appl_ptr1 += play->buf_pos / play->pcm_salign; if (appl_ptr < appl_ptr1) appl_ptr1 -= runtime->buffer_size; diff = (appl_ptr - appl_ptr1) * play->pcm_salign; if (diff < bytes) { clear_bytes = bytes - diff; bytes = diff; } } for (;;) { unsigned int size = bytes; if (src_off + size > play->pcm_buffer_size) size = play->pcm_buffer_size - src_off; if (dst_off + size > capt->pcm_buffer_size) size = capt->pcm_buffer_size - dst_off; if (!is_access_interleaved(runtime->access)) copy_play_buf_part_n(play, capt, size, src_off, dst_off); else memcpy(dst + dst_off, src + src_off, size); capt->silent_size = 0; bytes -= size; if (!bytes) break; src_off = (src_off + size) % play->pcm_buffer_size; dst_off = (dst_off + size) % capt->pcm_buffer_size; } if (clear_bytes > 0) { clear_capture_buf(capt, clear_bytes); capt->silent_size = 0; } } static inline unsigned int bytepos_delta(struct loopback_pcm *dpcm, unsigned int jiffies_delta) { unsigned long last_pos; unsigned int delta; last_pos = byte_pos(dpcm, dpcm->irq_pos); dpcm->irq_pos += jiffies_delta * dpcm->pcm_bps; delta = byte_pos(dpcm, dpcm->irq_pos) - last_pos; if (delta >= dpcm->last_drift) delta -= dpcm->last_drift; dpcm->last_drift = 0; if (dpcm->irq_pos >= dpcm->period_size_frac) { dpcm->irq_pos %= dpcm->period_size_frac; dpcm->period_update_pending = 1; } return delta; } static inline void bytepos_finish(struct loopback_pcm *dpcm, unsigned int delta) { dpcm->buf_pos += delta; dpcm->buf_pos %= dpcm->pcm_buffer_size; } /* call in cable->lock */ static unsigned int loopback_jiffies_timer_pos_update (struct loopback_cable *cable) { struct loopback_pcm *dpcm_play = cable->streams[SNDRV_PCM_STREAM_PLAYBACK]; struct loopback_pcm *dpcm_capt = cable->streams[SNDRV_PCM_STREAM_CAPTURE]; unsigned long delta_play = 0, delta_capt = 0, cur_jiffies; unsigned int running, count1, count2; cur_jiffies = jiffies; running = cable->running ^ cable->pause; if (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) { delta_play = cur_jiffies - dpcm_play->last_jiffies; dpcm_play->last_jiffies += delta_play; } if (running & (1 << SNDRV_PCM_STREAM_CAPTURE)) { delta_capt = cur_jiffies - dpcm_capt->last_jiffies; dpcm_capt->last_jiffies += delta_capt; } if (delta_play == 0 && delta_capt == 0) goto unlock; if (delta_play > delta_capt) { count1 = bytepos_delta(dpcm_play, delta_play - delta_capt); bytepos_finish(dpcm_play, count1); delta_play = delta_capt; } else if (delta_play < delta_capt) { count1 = bytepos_delta(dpcm_capt, delta_capt - delta_play); clear_capture_buf(dpcm_capt, count1); bytepos_finish(dpcm_capt, count1); delta_capt = delta_play; } if (delta_play == 0 && delta_capt == 0) goto unlock; /* note delta_capt == delta_play at this moment */ count1 = bytepos_delta(dpcm_play, delta_play); count2 = bytepos_delta(dpcm_capt, delta_capt); if (count1 < count2) { dpcm_capt->last_drift = count2 - count1; count1 = count2; } else if (count1 > count2) { dpcm_play->last_drift = count1 - count2; } copy_play_buf(dpcm_play, dpcm_capt, count1); bytepos_finish(dpcm_play, count1); bytepos_finish(dpcm_capt, count1); unlock: return running; } static void loopback_jiffies_timer_function(struct timer_list *t) { struct loopback_pcm *dpcm = timer_container_of(dpcm, t, timer); unsigned long flags; spin_lock_irqsave(&dpcm->cable->lock, flags); if (loopback_jiffies_timer_pos_update(dpcm->cable) & (1 << dpcm->substream->stream)) { loopback_jiffies_timer_start(dpcm); if (dpcm->period_update_pending) { dpcm->period_update_pending = 0; spin_unlock_irqrestore(&dpcm->cable->lock, flags); /* need to unlock before calling below */ snd_pcm_period_elapsed(dpcm->substream); return; } } spin_unlock_irqrestore(&dpcm->cable->lock, flags); } /* call in cable->lock */ static int loopback_snd_timer_check_resolution(struct snd_pcm_runtime *runtime, unsigned long resolution) { if (resolution != runtime->timer_resolution) { struct loopback_pcm *dpcm = runtime->private_data; struct loopback_cable *cable = dpcm->cable; /* Worst case estimation of possible values for resolution * resolution <= (512 * 1024) frames / 8kHz in nsec * resolution <= 65.536.000.000 nsec * * period_size <= 65.536.000.000 nsec / 1000nsec/usec * 192kHz + * 500.000 * period_size <= 12.582.912.000.000 <64bit * / 1.000.000 usec/sec */ snd_pcm_uframes_t period_size_usec = resolution / 1000 * runtime->rate; /* round to nearest sample rate */ snd_pcm_uframes_t period_size = (period_size_usec + 500 * 1000) / (1000 * 1000); pcm_err(dpcm->substream->pcm, "Period size (%lu frames) of loopback device is not corresponding to timer resolution (%lu nsec = %lu frames) of card timer %d,%d,%d. Use period size of %lu frames for loopback device.", runtime->period_size, resolution, period_size, cable->snd_timer.id.card, cable->snd_timer.id.device, cable->snd_timer.id.subdevice, period_size); return -EINVAL; } return 0; } static void loopback_snd_timer_period_elapsed(struct loopback_cable *cable, int event, unsigned long resolution) { struct loopback_pcm *dpcm_play, *dpcm_capt; struct snd_pcm_substream *substream_play, *substream_capt; struct snd_pcm_runtime *valid_runtime; unsigned int running, elapsed_bytes; unsigned long flags; spin_lock_irqsave(&cable->lock, flags); running = cable->running ^ cable->pause; /* no need to do anything if no stream is running */ if (!running) { spin_unlock_irqrestore(&cable->lock, flags); return; } dpcm_play = cable->streams[SNDRV_PCM_STREAM_PLAYBACK]; dpcm_capt = cable->streams[SNDRV_PCM_STREAM_CAPTURE]; if (event == SNDRV_TIMER_EVENT_MSTOP) { if (!dpcm_play || dpcm_play->substream->runtime->state != SNDRV_PCM_STATE_DRAINING) { spin_unlock_irqrestore(&cable->lock, flags); return; } } substream_play = (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) ? dpcm_play->substream : NULL; substream_capt = (running & (1 << SNDRV_PCM_STREAM_CAPTURE)) ? dpcm_capt->substream : NULL; valid_runtime = (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) ? dpcm_play->substream->runtime : dpcm_capt->substream->runtime; /* resolution is only valid for SNDRV_TIMER_EVENT_TICK events */ if (event == SNDRV_TIMER_EVENT_TICK) { /* The hardware rules guarantee that playback and capture period * are the same. Therefore only one device has to be checked * here. */ if (loopback_snd_timer_check_resolution(valid_runtime, resolution) < 0) { spin_unlock_irqrestore(&cable->lock, flags); if (substream_play) snd_pcm_stop_xrun(substream_play); if (substream_capt) snd_pcm_stop_xrun(substream_capt); return; } } elapsed_bytes = frames_to_bytes(valid_runtime, valid_runtime->period_size); /* The same timer interrupt is used for playback and capture device */ if ((running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) && (running & (1 << SNDRV_PCM_STREAM_CAPTURE))) { copy_play_buf(dpcm_play, dpcm_capt, elapsed_bytes); bytepos_finish(dpcm_play, elapsed_bytes); bytepos_finish(dpcm_capt, elapsed_bytes); } else if (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) { bytepos_finish(dpcm_play, elapsed_bytes); } else if (running & (1 << SNDRV_PCM_STREAM_CAPTURE)) { clear_capture_buf(dpcm_capt, elapsed_bytes); bytepos_finish(dpcm_capt, elapsed_bytes); } spin_unlock_irqrestore(&cable->lock, flags); if (substream_play) snd_pcm_period_elapsed(substream_play); if (substream_capt) snd_pcm_period_elapsed(substream_capt); } static void loopback_snd_timer_function(struct snd_timer_instance *timeri, unsigned long resolution, unsigned long ticks) { struct loopback_cable *cable = timeri->callback_data; loopback_snd_timer_period_elapsed(cable, SNDRV_TIMER_EVENT_TICK, resolution); } static void loopback_snd_timer_work(struct work_struct *work) { struct loopback_cable *cable; cable = container_of(work, struct loopback_cable, snd_timer.event_work); loopback_snd_timer_period_elapsed(cable, SNDRV_TIMER_EVENT_MSTOP, 0); } static void loopback_snd_timer_event(struct snd_timer_instance *timeri, int event, struct timespec64 *tstamp, unsigned long resolution) { /* Do not lock cable->lock here because timer->lock is already hold. * There are other functions which first lock cable->lock and than * timer->lock e.g. * loopback_trigger() * spin_lock(&cable->lock) * loopback_snd_timer_start() * snd_timer_start() * spin_lock(&timer->lock) * Therefore when using the oposit order of locks here it could result * in a deadlock. */ if (event == SNDRV_TIMER_EVENT_MSTOP) { struct loopback_cable *cable = timeri->callback_data; /* sound card of the timer was stopped. Therefore there will not * be any further timer callbacks. Due to this forward audio * data from here if in draining state. When still in running * state the streaming will be aborted by the usual timeout. It * should not be aborted here because may be the timer sound * card does only a recovery and the timer is back soon. * This work triggers loopback_snd_timer_work() */ schedule_work(&cable->snd_timer.event_work); } } static void loopback_jiffies_timer_dpcm_info(struct loopback_pcm *dpcm, struct snd_info_buffer *buffer) { snd_iprintf(buffer, " update_pending:\t%u\n", dpcm->period_update_pending); snd_iprintf(buffer, " irq_pos:\t\t%u\n", dpcm->irq_pos); snd_iprintf(buffer, " period_frac:\t%u\n", dpcm->period_size_frac); snd_iprintf(buffer, " last_jiffies:\t%lu (%lu)\n", dpcm->last_jiffies, jiffies); snd_iprintf(buffer, " timer_expires:\t%lu\n", dpcm->timer.expires); } static void loopback_snd_timer_dpcm_info(struct loopback_pcm *dpcm, struct snd_info_buffer *buffer) { struct loopback_cable *cable = dpcm->cable; snd_iprintf(buffer, " sound timer:\thw:%d,%d,%d\n", cable->snd_timer.id.card, cable->snd_timer.id.device, cable->snd_timer.id.subdevice); snd_iprintf(buffer, " timer open:\t\t%s\n", snd_pcm_direction_name(cable->snd_timer.stream)); } static snd_pcm_uframes_t loopback_pointer(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback_pcm *dpcm = runtime->private_data; snd_pcm_uframes_t pos; spin_lock(&dpcm->cable->lock); if (dpcm->cable->ops->pos_update) dpcm->cable->ops->pos_update(dpcm->cable); pos = dpcm->buf_pos; spin_unlock(&dpcm->cable->lock); return bytes_to_frames(runtime, pos); } static const struct snd_pcm_hardware loopback_pcm_hardware = { .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME | SNDRV_PCM_INFO_NONINTERLEAVED), .formats = (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE | SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_BE | SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S24_3BE | SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_S32_BE | SNDRV_PCM_FMTBIT_FLOAT_LE | SNDRV_PCM_FMTBIT_FLOAT_BE | SNDRV_PCM_FMTBIT_DSD_U8 | SNDRV_PCM_FMTBIT_DSD_U16_LE | SNDRV_PCM_FMTBIT_DSD_U16_BE | SNDRV_PCM_FMTBIT_DSD_U32_LE | SNDRV_PCM_FMTBIT_DSD_U32_BE), .rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_768000, .rate_min = 8000, .rate_max = 768000, .channels_min = 1, .channels_max = 32, .buffer_bytes_max = 2 * 1024 * 1024, .period_bytes_min = 64, /* note check overflow in frac_pos() using pcm_rate_shift before changing period_bytes_max value */ .period_bytes_max = 1024 * 1024, .periods_min = 1, .periods_max = 1024, .fifo_size = 0, }; static void loopback_runtime_free(struct snd_pcm_runtime *runtime) { struct loopback_pcm *dpcm = runtime->private_data; kfree(dpcm); } static int loopback_hw_free(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback_pcm *dpcm = runtime->private_data; struct loopback_cable *cable = dpcm->cable; mutex_lock(&dpcm->loopback->cable_lock); cable->valid &= ~(1 << substream->stream); mutex_unlock(&dpcm->loopback->cable_lock); return 0; } static unsigned int get_cable_index(struct snd_pcm_substream *substream) { if (!substream->pcm->device) return substream->stream; else return !substream->stream; } static int rule_format(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct loopback_pcm *dpcm = rule->private; struct loopback_cable *cable = dpcm->cable; struct snd_mask m; snd_mask_none(&m); mutex_lock(&dpcm->loopback->cable_lock); m.bits[0] = (u_int32_t)cable->hw.formats; m.bits[1] = (u_int32_t)(cable->hw.formats >> 32); mutex_unlock(&dpcm->loopback->cable_lock); return snd_mask_refine(hw_param_mask(params, rule->var), &m); } static int rule_rate(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct loopback_pcm *dpcm = rule->private; struct loopback_cable *cable = dpcm->cable; struct snd_interval t; mutex_lock(&dpcm->loopback->cable_lock); t.min = cable->hw.rate_min; t.max = cable->hw.rate_max; mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static int rule_channels(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct loopback_pcm *dpcm = rule->private; struct loopback_cable *cable = dpcm->cable; struct snd_interval t; mutex_lock(&dpcm->loopback->cable_lock); t.min = cable->hw.channels_min; t.max = cable->hw.channels_max; mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static int rule_period_bytes(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { struct loopback_pcm *dpcm = rule->private; struct loopback_cable *cable = dpcm->cable; struct snd_interval t; mutex_lock(&dpcm->loopback->cable_lock); t.min = cable->hw.period_bytes_min; t.max = cable->hw.period_bytes_max; mutex_unlock(&dpcm->loopback->cable_lock); t.openmin = 0; t.openmax = 0; t.integer = 0; return snd_interval_refine(hw_param_interval(params, rule->var), &t); } static void free_cable(struct snd_pcm_substream *substream) { struct loopback *loopback = substream->private_data; int dev = get_cable_index(substream); struct loopback_cable *cable; cable = loopback->cables[substream->number][dev]; if (!cable) return; if (cable->streams[!substream->stream]) { /* other stream is still alive */ spin_lock_irq(&cable->lock); cable->streams[substream->stream] = NULL; spin_unlock_irq(&cable->lock); } else { struct loopback_pcm *dpcm = substream->runtime->private_data; if (cable->ops && cable->ops->close_cable && dpcm) cable->ops->close_cable(dpcm); /* free the cable */ loopback->cables[substream->number][dev] = NULL; kfree(cable); } } static int loopback_jiffies_timer_open(struct loopback_pcm *dpcm) { timer_setup(&dpcm->timer, loopback_jiffies_timer_function, 0); return 0; } static const struct loopback_ops loopback_jiffies_timer_ops = { .open = loopback_jiffies_timer_open, .start = loopback_jiffies_timer_start, .stop = loopback_jiffies_timer_stop, .stop_sync = loopback_jiffies_timer_stop_sync, .close_substream = loopback_jiffies_timer_stop_sync, .pos_update = loopback_jiffies_timer_pos_update, .dpcm_info = loopback_jiffies_timer_dpcm_info, }; static int loopback_parse_timer_id(const char *str, struct snd_timer_id *tid) { /* [<pref>:](<card name>|<card idx>)[{.,}<dev idx>[{.,}<subdev idx>]] */ const char * const sep_dev = ".,"; const char * const sep_pref = ":"; const char *name = str; char *sep, save = '\0'; int card_idx = 0, dev = 0, subdev = 0; int err; sep = strpbrk(str, sep_pref); if (sep) name = sep + 1; sep = strpbrk(name, sep_dev); if (sep) { save = *sep; *sep = '\0'; } err = kstrtoint(name, 0, &card_idx); if (err == -EINVAL) { /* Must be the name, not number */ for (card_idx = 0; card_idx < snd_ecards_limit; card_idx++) { struct snd_card *card = snd_card_ref(card_idx); if (card) { if (!strcmp(card->id, name)) err = 0; snd_card_unref(card); } if (!err) break; } } if (sep) { *sep = save; if (!err) { char *sep2, save2 = '\0'; sep2 = strpbrk(sep + 1, sep_dev); if (sep2) { save2 = *sep2; *sep2 = '\0'; } err = kstrtoint(sep + 1, 0, &dev); if (sep2) { *sep2 = save2; if (!err) err = kstrtoint(sep2 + 1, 0, &subdev); } } } if (card_idx == -1) tid->dev_class = SNDRV_TIMER_CLASS_GLOBAL; if (!err && tid) { tid->card = card_idx; tid->device = dev; tid->subdevice = subdev; } return err; } /* call in loopback->cable_lock */ static int loopback_snd_timer_open(struct loopback_pcm *dpcm) { int err = 0; struct snd_timer_id tid = { .dev_class = SNDRV_TIMER_CLASS_PCM, .dev_sclass = SNDRV_TIMER_SCLASS_APPLICATION, }; struct snd_timer_instance *timeri; struct loopback_cable *cable = dpcm->cable; /* check if timer was already opened. It is only opened once * per playback and capture subdevice (aka cable). */ if (cable->snd_timer.instance) goto exit; err = loopback_parse_timer_id(dpcm->loopback->timer_source, &tid); if (err < 0) { pcm_err(dpcm->substream->pcm, "Parsing timer source \'%s\' failed with %d", dpcm->loopback->timer_source, err); goto exit; } cable->snd_timer.stream = dpcm->substream->stream; cable->snd_timer.id = tid; timeri = snd_timer_instance_new(dpcm->loopback->card->id); if (!timeri) { err = -ENOMEM; goto exit; } /* The callback has to be called from another work. If * SNDRV_TIMER_IFLG_FAST is specified it will be called from the * snd_pcm_period_elapsed() call of the selected sound card. * snd_pcm_period_elapsed() helds snd_pcm_stream_lock_irqsave(). * Due to our callback loopback_snd_timer_function() also calls * snd_pcm_period_elapsed() which calls snd_pcm_stream_lock_irqsave(). * This would end up in a dead lock. */ timeri->flags |= SNDRV_TIMER_IFLG_AUTO; timeri->callback = loopback_snd_timer_function; timeri->callback_data = (void *)cable; timeri->ccallback = loopback_snd_timer_event; /* initialise a work used for draining */ INIT_WORK(&cable->snd_timer.event_work, loopback_snd_timer_work); /* The mutex loopback->cable_lock is kept locked. * Therefore snd_timer_open() cannot be called a second time * by the other device of the same cable. * Therefore the following issue cannot happen: * [proc1] Call loopback_timer_open() -> * Unlock cable->lock for snd_timer_close/open() call * [proc2] Call loopback_timer_open() -> snd_timer_open(), * snd_timer_start() * [proc1] Call snd_timer_open() and overwrite running timer * instance */ err = snd_timer_open(timeri, &cable->snd_timer.id, current->pid); if (err < 0) { pcm_err(dpcm->substream->pcm, "snd_timer_open (%d,%d,%d) failed with %d", cable->snd_timer.id.card, cable->snd_timer.id.device, cable->snd_timer.id.subdevice, err); snd_timer_instance_free(timeri); goto exit; } cable->snd_timer.instance = timeri; exit: return err; } /* stop_sync() is not required for sound timer because it does not need to be * restarted in loopback_prepare() on Xrun recovery */ static const struct loopback_ops loopback_snd_timer_ops = { .open = loopback_snd_timer_open, .start = loopback_snd_timer_start, .stop = loopback_snd_timer_stop, .close_cable = loopback_snd_timer_close_cable, .dpcm_info = loopback_snd_timer_dpcm_info, }; static int loopback_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm; struct loopback_cable *cable = NULL; int err = 0; int dev = get_cable_index(substream); mutex_lock(&loopback->cable_lock); dpcm = kzalloc(sizeof(*dpcm), GFP_KERNEL); if (!dpcm) { err = -ENOMEM; goto unlock; } dpcm->loopback = loopback; dpcm->substream = substream; cable = loopback->cables[substream->number][dev]; if (!cable) { cable = kzalloc(sizeof(*cable), GFP_KERNEL); if (!cable) { err = -ENOMEM; goto unlock; } spin_lock_init(&cable->lock); cable->hw = loopback_pcm_hardware; if (loopback->timer_source) cable->ops = &loopback_snd_timer_ops; else cable->ops = &loopback_jiffies_timer_ops; loopback->cables[substream->number][dev] = cable; } dpcm->cable = cable; runtime->private_data = dpcm; if (cable->ops->open) { err = cable->ops->open(dpcm); if (err < 0) goto unlock; } snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); /* use dynamic rules based on actual runtime->hw values */ /* note that the default rules created in the PCM midlevel code */ /* are cached -> they do not reflect the actual state */ err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, rule_format, dpcm, SNDRV_PCM_HW_PARAM_FORMAT, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, rule_rate, dpcm, SNDRV_PCM_HW_PARAM_RATE, -1); if (err < 0) goto unlock; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, rule_channels, dpcm, SNDRV_PCM_HW_PARAM_CHANNELS, -1); if (err < 0) goto unlock; /* In case of sound timer the period time of both devices of the same * loop has to be the same. * This rule only takes effect if a sound timer was chosen */ if (cable->snd_timer.instance) { err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, rule_period_bytes, dpcm, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, -1); if (err < 0) goto unlock; } /* loopback_runtime_free() has not to be called if kfree(dpcm) was * already called here. Otherwise it will end up with a double free. */ runtime->private_free = loopback_runtime_free; if (get_notify(dpcm)) runtime->hw = loopback_pcm_hardware; else runtime->hw = cable->hw; spin_lock_irq(&cable->lock); cable->streams[substream->stream] = dpcm; spin_unlock_irq(&cable->lock); unlock: if (err < 0) { free_cable(substream); kfree(dpcm); } mutex_unlock(&loopback->cable_lock); return err; } static int loopback_close(struct snd_pcm_substream *substream) { struct loopback *loopback = substream->private_data; struct loopback_pcm *dpcm = substream->runtime->private_data; int err = 0; if (dpcm->cable->ops->close_substream) err = dpcm->cable->ops->close_substream(dpcm); mutex_lock(&loopback->cable_lock); free_cable(substream); mutex_unlock(&loopback->cable_lock); return err; } static const struct snd_pcm_ops loopback_pcm_ops = { .open = loopback_open, .close = loopback_close, .hw_free = loopback_hw_free, .prepare = loopback_prepare, .trigger = loopback_trigger, .pointer = loopback_pointer, }; static int loopback_pcm_new(struct loopback *loopback, int device, int substreams) { struct snd_pcm *pcm; int err; err = snd_pcm_new(loopback->card, "Loopback PCM", device, substreams, substreams, &pcm); if (err < 0) return err; snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &loopback_pcm_ops); snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &loopback_pcm_ops); snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_VMALLOC, NULL, 0, 0); pcm->private_data = loopback; pcm->info_flags = 0; strcpy(pcm->name, "Loopback PCM"); loopback->pcm[device] = pcm; return 0; } static int loopback_rate_shift_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = 1; uinfo->value.integer.min = 80000; uinfo->value.integer.max = 120000; uinfo->value.integer.step = 1; return 0; } static int loopback_rate_shift_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); mutex_lock(&loopback->cable_lock); ucontrol->value.integer.value[0] = loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].rate_shift; mutex_unlock(&loopback->cable_lock); return 0; } static int loopback_rate_shift_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); unsigned int val; int change = 0; val = ucontrol->value.integer.value[0]; if (val < 80000) val = 80000; if (val > 120000) val = 120000; mutex_lock(&loopback->cable_lock); if (val != loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].rate_shift) { loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].rate_shift = val; change = 1; } mutex_unlock(&loopback->cable_lock); return change; } static int loopback_notify_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); mutex_lock(&loopback->cable_lock); ucontrol->value.integer.value[0] = loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].notify; mutex_unlock(&loopback->cable_lock); return 0; } static int loopback_notify_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); unsigned int val; int change = 0; val = ucontrol->value.integer.value[0] ? 1 : 0; mutex_lock(&loopback->cable_lock); if (val != loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].notify) { loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].notify = val; change = 1; } mutex_unlock(&loopback->cable_lock); return change; } static int loopback_active_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); struct loopback_cable *cable; unsigned int val = 0; mutex_lock(&loopback->cable_lock); cable = loopback->cables[kcontrol->id.subdevice][kcontrol->id.device ^ 1]; if (cable != NULL) { unsigned int running = cable->running ^ cable->pause; val = (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) ? 1 : 0; } mutex_unlock(&loopback->cable_lock); ucontrol->value.integer.value[0] = val; return 0; } static int loopback_format_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = 1; uinfo->value.integer.min = 0; uinfo->value.integer.max = (__force int)SNDRV_PCM_FORMAT_LAST; uinfo->value.integer.step = 1; return 0; } static int loopback_format_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); ucontrol->value.integer.value[0] = (__force int)loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].format; return 0; } static int loopback_rate_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = 1; uinfo->value.integer.min = 0; uinfo->value.integer.max = 192000; uinfo->value.integer.step = 1; return 0; } static int loopback_rate_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); mutex_lock(&loopback->cable_lock); ucontrol->value.integer.value[0] = loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].rate; mutex_unlock(&loopback->cable_lock); return 0; } static int loopback_channels_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; uinfo->count = 1; uinfo->value.integer.min = 1; uinfo->value.integer.max = 1024; uinfo->value.integer.step = 1; return 0; } static int loopback_channels_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); mutex_lock(&loopback->cable_lock); ucontrol->value.integer.value[0] = loopback->setup[kcontrol->id.subdevice] [kcontrol->id.device].channels; mutex_unlock(&loopback->cable_lock); return 0; } static int loopback_access_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo) { const char * const texts[] = {"Interleaved", "Non-interleaved"}; return snd_ctl_enum_info(uinfo, 1, ARRAY_SIZE(texts), texts); } static int loopback_access_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct loopback *loopback = snd_kcontrol_chip(kcontrol); snd_pcm_access_t access; mutex_lock(&loopback->cable_lock); access = loopback->setup[kcontrol->id.subdevice][kcontrol->id.device].access; ucontrol->value.enumerated.item[0] = !is_access_interleaved(access); mutex_unlock(&loopback->cable_lock); return 0; } static const struct snd_kcontrol_new loopback_controls[] = { { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Rate Shift 100000", .info = loopback_rate_shift_info, .get = loopback_rate_shift_get, .put = loopback_rate_shift_put, }, { .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Notify", .info = snd_ctl_boolean_mono_info, .get = loopback_notify_get, .put = loopback_notify_put, }, #define ACTIVE_IDX 2 { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Slave Active", .info = snd_ctl_boolean_mono_info, .get = loopback_active_get, }, #define FORMAT_IDX 3 { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Slave Format", .info = loopback_format_info, .get = loopback_format_get }, #define RATE_IDX 4 { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Slave Rate", .info = loopback_rate_info, .get = loopback_rate_get }, #define CHANNELS_IDX 5 { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Slave Channels", .info = loopback_channels_info, .get = loopback_channels_get }, #define ACCESS_IDX 6 { .access = SNDRV_CTL_ELEM_ACCESS_READ, .iface = SNDRV_CTL_ELEM_IFACE_PCM, .name = "PCM Slave Access Mode", .info = loopback_access_info, .get = loopback_access_get, }, }; static int loopback_mixer_new(struct loopback *loopback, int notify) { struct snd_card *card = loopback->card; struct snd_pcm *pcm; struct snd_kcontrol *kctl; struct loopback_setup *setup; int err, dev, substr, substr_count, idx; strcpy(card->mixername, "Loopback Mixer"); for (dev = 0; dev < 2; dev++) { pcm = loopback->pcm[dev]; substr_count = pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream_count; for (substr = 0; substr < substr_count; substr++) { setup = &loopback->setup[substr][dev]; setup->notify = notify; setup->rate_shift = NO_PITCH; setup->format = SNDRV_PCM_FORMAT_S16_LE; setup->access = SNDRV_PCM_ACCESS_RW_INTERLEAVED; setup->rate = 48000; setup->channels = 2; for (idx = 0; idx < ARRAY_SIZE(loopback_controls); idx++) { kctl = snd_ctl_new1(&loopback_controls[idx], loopback); if (!kctl) return -ENOMEM; kctl->id.device = dev; kctl->id.subdevice = substr; /* Add the control before copying the id so that * the numid field of the id is set in the copy. */ err = snd_ctl_add(card, kctl); if (err < 0) return err; switch (idx) { case ACTIVE_IDX: setup->active_id = kctl->id; break; case FORMAT_IDX: setup->format_id = kctl->id; break; case RATE_IDX: setup->rate_id = kctl->id; break; case CHANNELS_IDX: setup->channels_id = kctl->id; break; case ACCESS_IDX: setup->access_id = kctl->id; break; default: break; } } } } return 0; } static void print_dpcm_info(struct snd_info_buffer *buffer, struct loopback_pcm *dpcm, const char *id) { snd_iprintf(buffer, " %s\n", id); if (dpcm == NULL) { snd_iprintf(buffer, " inactive\n"); return; } snd_iprintf(buffer, " buffer_size:\t%u\n", dpcm->pcm_buffer_size); snd_iprintf(buffer, " buffer_pos:\t\t%u\n", dpcm->buf_pos); snd_iprintf(buffer, " silent_size:\t%u\n", dpcm->silent_size); snd_iprintf(buffer, " period_size:\t%u\n", dpcm->pcm_period_size); snd_iprintf(buffer, " bytes_per_sec:\t%u\n", dpcm->pcm_bps); snd_iprintf(buffer, " sample_align:\t%u\n", dpcm->pcm_salign); snd_iprintf(buffer, " rate_shift:\t\t%u\n", dpcm->pcm_rate_shift); if (dpcm->cable->ops->dpcm_info) dpcm->cable->ops->dpcm_info(dpcm, buffer); } static void print_substream_info(struct snd_info_buffer *buffer, struct loopback *loopback, int sub, int num) { struct loopback_cable *cable = loopback->cables[sub][num]; snd_iprintf(buffer, "Cable %i substream %i:\n", num, sub); if (cable == NULL) { snd_iprintf(buffer, " inactive\n"); return; } snd_iprintf(buffer, " valid: %u\n", cable->valid); snd_iprintf(buffer, " running: %u\n", cable->running); snd_iprintf(buffer, " pause: %u\n", cable->pause); print_dpcm_info(buffer, cable->streams[0], "Playback"); print_dpcm_info(buffer, cable->streams[1], "Capture"); } static void print_cable_info(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct loopback *loopback = entry->private_data; int sub, num; mutex_lock(&loopback->cable_lock); num = entry->name[strlen(entry->name)-1]; num = num == '0' ? 0 : 1; for (sub = 0; sub < MAX_PCM_SUBSTREAMS; sub++) print_substream_info(buffer, loopback, sub, num); mutex_unlock(&loopback->cable_lock); } static int loopback_cable_proc_new(struct loopback *loopback, int cidx) { char name[32]; snprintf(name, sizeof(name), "cable#%d", cidx); return snd_card_ro_proc_new(loopback->card, name, loopback, print_cable_info); } static void loopback_set_timer_source(struct loopback *loopback, const char *value) { if (loopback->timer_source) { devm_kfree(loopback->card->dev, loopback->timer_source); loopback->timer_source = NULL; } if (value && *value) loopback->timer_source = devm_kstrdup(loopback->card->dev, value, GFP_KERNEL); } static void print_timer_source_info(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct loopback *loopback = entry->private_data; mutex_lock(&loopback->cable_lock); snd_iprintf(buffer, "%s\n", loopback->timer_source ? loopback->timer_source : ""); mutex_unlock(&loopback->cable_lock); } static void change_timer_source_info(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct loopback *loopback = entry->private_data; char line[64]; mutex_lock(&loopback->cable_lock); if (!snd_info_get_line(buffer, line, sizeof(line))) loopback_set_timer_source(loopback, strim(line)); mutex_unlock(&loopback->cable_lock); } static int loopback_timer_source_proc_new(struct loopback *loopback) { return snd_card_rw_proc_new(loopback->card, "timer_source", loopback, print_timer_source_info, change_timer_source_info); } static int loopback_probe(struct platform_device *devptr) { struct snd_card *card; struct loopback *loopback; int dev = devptr->id; int err; err = snd_devm_card_new(&devptr->dev, index[dev], id[dev], THIS_MODULE, sizeof(struct loopback), &card); if (err < 0) return err; loopback = card->private_data; if (pcm_substreams[dev] < 1) pcm_substreams[dev] = 1; if (pcm_substreams[dev] > MAX_PCM_SUBSTREAMS) pcm_substreams[dev] = MAX_PCM_SUBSTREAMS; loopback->card = card; loopback_set_timer_source(loopback, timer_source[dev]); mutex_init(&loopback->cable_lock); err = loopback_pcm_new(loopback, 0, pcm_substreams[dev]); if (err < 0) return err; err = loopback_pcm_new(loopback, 1, pcm_substreams[dev]); if (err < 0) return err; err = loopback_mixer_new(loopback, pcm_notify[dev] ? 1 : 0); if (err < 0) return err; loopback_cable_proc_new(loopback, 0); loopback_cable_proc_new(loopback, 1); loopback_timer_source_proc_new(loopback); strcpy(card->driver, "Loopback"); strcpy(card->shortname, "Loopback"); sprintf(card->longname, "Loopback %i", dev + 1); err = snd_card_register(card); if (err < 0) return err; platform_set_drvdata(devptr, card); return 0; } static int loopback_suspend(struct device *pdev) { struct snd_card *card = dev_get_drvdata(pdev); snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); return 0; } static int loopback_resume(struct device *pdev) { struct snd_card *card = dev_get_drvdata(pdev); snd_power_change_state(card, SNDRV_CTL_POWER_D0); return 0; } static DEFINE_SIMPLE_DEV_PM_OPS(loopback_pm, loopback_suspend, loopback_resume); #define SND_LOOPBACK_DRIVER "snd_aloop" static struct platform_driver loopback_driver = { .probe = loopback_probe, .driver = { .name = SND_LOOPBACK_DRIVER, .pm = &loopback_pm, }, }; static void loopback_unregister_all(void) { int i; for (i = 0; i < ARRAY_SIZE(devices); ++i) platform_device_unregister(devices[i]); platform_driver_unregister(&loopback_driver); } static int __init alsa_card_loopback_init(void) { int i, err, cards; err = platform_driver_register(&loopback_driver); if (err < 0) return err; cards = 0; for (i = 0; i < SNDRV_CARDS; i++) { struct platform_device *device; if (!enable[i]) continue; device = platform_device_register_simple(SND_LOOPBACK_DRIVER, i, NULL, 0); if (IS_ERR(device)) continue; if (!platform_get_drvdata(device)) { platform_device_unregister(device); continue; } devices[i] = device; cards++; } if (!cards) { #ifdef MODULE pr_err("aloop: No loopback enabled\n"); #endif loopback_unregister_all(); return -ENODEV; } return 0; } static void __exit alsa_card_loopback_exit(void) { loopback_unregister_all(); } module_init(alsa_card_loopback_init) module_exit(alsa_card_loopback_exit)
67 24 42 41 13 28 36 5 31 1 1 22 21 21 3 2 1 2 3 4 6 2 8 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ #include <linux/bitmap.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/btf_ids.h> #define BLOOM_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) struct bpf_bloom_filter { struct bpf_map map; u32 bitset_mask; u32 hash_seed; u32 nr_hash_funcs; unsigned long bitset[]; }; static u32 hash(struct bpf_bloom_filter *bloom, void *value, u32 value_size, u32 index) { u32 h; if (likely(value_size % 4 == 0)) h = jhash2(value, value_size / 4, bloom->hash_seed + index); else h = jhash(value, value_size, bloom->hash_seed + index); return h & bloom->bitset_mask; } static long bloom_map_peek_elem(struct bpf_map *map, void *value) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); u32 i, h; for (i = 0; i < bloom->nr_hash_funcs; i++) { h = hash(bloom, value, map->value_size, i); if (!test_bit(h, bloom->bitset)) return -ENOENT; } return 0; } static long bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); u32 i, h; if (flags != BPF_ANY) return -EINVAL; for (i = 0; i < bloom->nr_hash_funcs; i++) { h = hash(bloom, value, map->value_size, i); set_bit(h, bloom->bitset); } return 0; } static long bloom_map_pop_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } static long bloom_map_delete_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -EOPNOTSUPP; } /* Called from syscall */ static int bloom_map_alloc_check(union bpf_attr *attr) { if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. */ return -E2BIG; return 0; } static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; int numa_node = bpf_map_attr_numa_node(attr); struct bpf_bloom_filter *bloom; if (attr->key_size != 0 || attr->value_size == 0 || attr->max_entries == 0 || attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || !bpf_map_flags_access_ok(attr->map_flags) || /* The lower 4 bits of map_extra (0xF) specify the number * of hash functions */ (attr->map_extra & ~0xF)) return ERR_PTR(-EINVAL); nr_hash_funcs = attr->map_extra; if (nr_hash_funcs == 0) /* Default to using 5 hash functions if unspecified */ nr_hash_funcs = 5; /* For the bloom filter, the optimal bit array size that minimizes the * false positive probability is n * k / ln(2) where n is the number of * expected entries in the bloom filter and k is the number of hash * functions. We use 7 / 5 to approximate 1 / ln(2). * * We round this up to the nearest power of two to enable more efficient * hashing using bitmasks. The bitmask will be the bit array size - 1. * * If this overflows a u32, the bit array size will have 2^32 (4 * GB) bits. */ if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || nr_bits > (1UL << 31)) { /* The bit array size is 2^32 bits but to avoid overflowing the * u32, we use U32_MAX, which will round up to the equivalent * number of bytes */ bitset_bytes = BITS_TO_BYTES(U32_MAX); bitset_mask = U32_MAX; } else { if (nr_bits <= BITS_PER_LONG) nr_bits = BITS_PER_LONG; else nr_bits = roundup_pow_of_two(nr_bits); bitset_bytes = BITS_TO_BYTES(nr_bits); bitset_mask = nr_bits - 1; } bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node); if (!bloom) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&bloom->map, attr); bloom->nr_hash_funcs = nr_hash_funcs; bloom->bitset_mask = bitset_mask; if (!(attr->map_flags & BPF_F_ZERO_SEED)) bloom->hash_seed = get_random_u32(); return &bloom->map; } static void bloom_map_free(struct bpf_map *map) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); bpf_map_area_free(bloom); } static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) { /* The eBPF program should use map_peek_elem instead */ return ERR_PTR(-EINVAL); } static long bloom_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { /* The eBPF program should use map_push_elem instead */ return -EINVAL; } static int bloom_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { /* Bloom filter maps are keyless */ return btf_type_is_void(key_type) ? 0 : -EINVAL; } static u64 bloom_map_mem_usage(const struct bpf_map *map) { struct bpf_bloom_filter *bloom; u64 bitset_bytes; bloom = container_of(map, struct bpf_bloom_filter, map); bitset_bytes = BITS_TO_BYTES((u64)bloom->bitset_mask + 1); bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); return sizeof(*bloom) + bitset_bytes; } BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bloom_map_alloc_check, .map_alloc = bloom_map_alloc, .map_free = bloom_map_free, .map_get_next_key = bloom_map_get_next_key, .map_push_elem = bloom_map_push_elem, .map_peek_elem = bloom_map_peek_elem, .map_pop_elem = bloom_map_pop_elem, .map_lookup_elem = bloom_map_lookup_elem, .map_update_elem = bloom_map_update_elem, .map_delete_elem = bloom_map_delete_elem, .map_check_btf = bloom_map_check_btf, .map_mem_usage = bloom_map_mem_usage, .map_btf_id = &bpf_bloom_map_btf_ids[0], };
3 3 3 1 1 1 2 2 1 3 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 // SPDX-License-Identifier: GPL-2.0-or-later /* * Software async crypto daemon. * * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> * * Added AEAD support to cryptd. * Authors: Tadeusz Struk (tadeusz.struk@intel.com) * Adrian Hoban <adrian.hoban@intel.com> * Gabriele Paoloni <gabriele.paoloni@intel.com> * Aidan O'Mahony (aidan.o.mahony@intel.com) * Copyright (c) 2010, Intel Corporation. */ #include <crypto/internal/hash.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/cryptd.h> #include <linux/refcount.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/workqueue.h> static unsigned int cryptd_max_cpu_qlen = 1000; module_param(cryptd_max_cpu_qlen, uint, 0); MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth"); static struct workqueue_struct *cryptd_wq; struct cryptd_cpu_queue { struct crypto_queue queue; struct work_struct work; }; struct cryptd_queue { /* * Protected by disabling BH to allow enqueueing from softinterrupt and * dequeuing from kworker (cryptd_queue_worker()). */ struct cryptd_cpu_queue __percpu *cpu_queue; }; struct cryptd_instance_ctx { struct crypto_spawn spawn; struct cryptd_queue *queue; }; struct skcipherd_instance_ctx { struct crypto_skcipher_spawn spawn; struct cryptd_queue *queue; }; struct hashd_instance_ctx { struct crypto_shash_spawn spawn; struct cryptd_queue *queue; }; struct aead_instance_ctx { struct crypto_aead_spawn aead_spawn; struct cryptd_queue *queue; }; struct cryptd_skcipher_ctx { refcount_t refcnt; struct crypto_skcipher *child; }; struct cryptd_skcipher_request_ctx { struct skcipher_request req; }; struct cryptd_hash_ctx { refcount_t refcnt; struct crypto_shash *child; }; struct cryptd_hash_request_ctx { crypto_completion_t complete; void *data; struct shash_desc desc; }; struct cryptd_aead_ctx { refcount_t refcnt; struct crypto_aead *child; }; struct cryptd_aead_request_ctx { struct aead_request req; }; static void cryptd_queue_worker(struct work_struct *work); static int cryptd_init_queue(struct cryptd_queue *queue, unsigned int max_cpu_qlen) { int cpu; struct cryptd_cpu_queue *cpu_queue; queue->cpu_queue = alloc_percpu(struct cryptd_cpu_queue); if (!queue->cpu_queue) return -ENOMEM; for_each_possible_cpu(cpu) { cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, cryptd_queue_worker); } pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen); return 0; } static void cryptd_fini_queue(struct cryptd_queue *queue) { int cpu; struct cryptd_cpu_queue *cpu_queue; for_each_possible_cpu(cpu) { cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); BUG_ON(cpu_queue->queue.qlen); } free_percpu(queue->cpu_queue); } static int cryptd_enqueue_request(struct cryptd_queue *queue, struct crypto_async_request *request) { int err; struct cryptd_cpu_queue *cpu_queue; refcount_t *refcnt; local_bh_disable(); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); refcnt = crypto_tfm_ctx(request->tfm); if (err == -ENOSPC) goto out; queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work); if (!refcount_read(refcnt)) goto out; refcount_inc(refcnt); out: local_bh_enable(); return err; } /* Called in workqueue context, do one real cryption work (via * req->complete) and reschedule itself if there are more work to * do. */ static void cryptd_queue_worker(struct work_struct *work) { struct cryptd_cpu_queue *cpu_queue; struct crypto_async_request *req, *backlog; cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. */ local_bh_disable(); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); local_bh_enable(); if (!req) return; if (backlog) crypto_request_complete(backlog, -EINPROGRESS); crypto_request_complete(req, 0); if (cpu_queue->queue.qlen) queue_work(cryptd_wq, &cpu_queue->work); } static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm) { struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); struct cryptd_instance_ctx *ictx = crypto_instance_ctx(inst); return ictx->queue; } static void cryptd_type_and_mask(struct crypto_attr_type *algt, u32 *type, u32 *mask) { /* * cryptd is allowed to wrap internal algorithms, but in that case the * resulting cryptd instance will be marked as internal as well. */ *type = algt->type & CRYPTO_ALG_INTERNAL; *mask = algt->mask & CRYPTO_ALG_INTERNAL; /* No point in cryptd wrapping an algorithm that's already async. */ *mask |= CRYPTO_ALG_ASYNC; *mask |= crypto_algt_inherited_mask(algt); } static int cryptd_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); inst->alg.cra_priority = alg->cra_priority + 50; inst->alg.cra_blocksize = alg->cra_blocksize; inst->alg.cra_alignmask = alg->cra_alignmask; return 0; } static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child = ctx->child; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } static struct skcipher_request *cryptd_skcipher_prepare( struct skcipher_request *req, int err) { struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct skcipher_request *subreq = &rctx->req; struct cryptd_skcipher_ctx *ctx; struct crypto_skcipher *child; req->base.complete = subreq->base.complete; req->base.data = subreq->base.data; if (unlikely(err == -EINPROGRESS)) return NULL; ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); child = ctx->child; skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); return subreq; } static void cryptd_skcipher_complete(struct skcipher_request *req, int err, crypto_completion_t complete) { struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->req; int refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); skcipher_request_complete(req, err); local_bh_enable(); if (unlikely(err == -EINPROGRESS)) { subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = complete; req->base.data = req; } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_skcipher(tfm); } static void cryptd_skcipher_encrypt(void *data, int err) { struct skcipher_request *req = data; struct skcipher_request *subreq; subreq = cryptd_skcipher_prepare(req, err); if (likely(subreq)) err = crypto_skcipher_encrypt(subreq); cryptd_skcipher_complete(req, err, cryptd_skcipher_encrypt); } static void cryptd_skcipher_decrypt(void *data, int err) { struct skcipher_request *req = data; struct skcipher_request *subreq; subreq = cryptd_skcipher_prepare(req, err); if (likely(subreq)) err = crypto_skcipher_decrypt(subreq); cryptd_skcipher_complete(req, err, cryptd_skcipher_decrypt); } static int cryptd_skcipher_enqueue(struct skcipher_request *req, crypto_completion_t compl) { struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->req; struct cryptd_queue *queue; queue = cryptd_get_queue(crypto_skcipher_tfm(tfm)); subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = compl; req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } static int cryptd_skcipher_encrypt_enqueue(struct skcipher_request *req) { return cryptd_skcipher_enqueue(req, cryptd_skcipher_encrypt); } static int cryptd_skcipher_decrypt_enqueue(struct skcipher_request *req) { return cryptd_skcipher_enqueue(req, cryptd_skcipher_decrypt); } static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct skcipherd_instance_ctx *ictx = skcipher_instance_ctx(inst); struct crypto_skcipher_spawn *spawn = &ictx->spawn; struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *cipher; cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; crypto_skcipher_set_reqsize( tfm, sizeof(struct cryptd_skcipher_request_ctx) + crypto_skcipher_reqsize(cipher)); return 0; } static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); } static void cryptd_skcipher_free(struct skcipher_instance *inst) { struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ctx->spawn); kfree(inst); } static int cryptd_create_skcipher(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt, struct cryptd_queue *queue) { struct skcipherd_instance_ctx *ctx; struct skcipher_instance *inst; struct skcipher_alg_common *alg; u32 type; u32 mask; int err; cryptd_type_and_mask(algt, &type, &mask); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = skcipher_instance_ctx(inst); ctx->queue = queue; err = crypto_grab_skcipher(&ctx->spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(&ctx->spawn); err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base); if (err) goto err_free_inst; inst->alg.base.cra_flags |= CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); inst->alg.ivsize = alg->ivsize; inst->alg.chunksize = alg->chunksize; inst->alg.min_keysize = alg->min_keysize; inst->alg.max_keysize = alg->max_keysize; inst->alg.base.cra_ctxsize = sizeof(struct cryptd_skcipher_ctx); inst->alg.init = cryptd_skcipher_init_tfm; inst->alg.exit = cryptd_skcipher_exit_tfm; inst->alg.setkey = cryptd_skcipher_setkey; inst->alg.encrypt = cryptd_skcipher_encrypt_enqueue; inst->alg.decrypt = cryptd_skcipher_decrypt_enqueue; inst->free = cryptd_skcipher_free; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: cryptd_skcipher_free(inst); } return err; } static int cryptd_hash_init_tfm(struct crypto_ahash *tfm) { struct ahash_instance *inst = ahash_alg_instance(tfm); struct hashd_instance_ctx *ictx = ahash_instance_ctx(inst); struct crypto_shash_spawn *spawn = &ictx->spawn; struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *hash; hash = crypto_spawn_shash(spawn); if (IS_ERR(hash)) return PTR_ERR(hash); ctx->child = hash; crypto_ahash_set_reqsize(tfm, sizeof(struct cryptd_hash_request_ctx) + crypto_shash_descsize(hash)); return 0; } static int cryptd_hash_clone_tfm(struct crypto_ahash *ntfm, struct crypto_ahash *tfm) { struct cryptd_hash_ctx *nctx = crypto_ahash_ctx(ntfm); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *hash; hash = crypto_clone_shash(ctx->child); if (IS_ERR(hash)) return PTR_ERR(hash); nctx->child = hash; return 0; } static void cryptd_hash_exit_tfm(struct crypto_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); crypto_free_shash(ctx->child); } static int cryptd_hash_setkey(struct crypto_ahash *parent, const u8 *key, unsigned int keylen) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); struct crypto_shash *child = ctx->child; crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_shash_setkey(child, key, keylen); } static int cryptd_hash_enqueue(struct ahash_request *req, crypto_completion_t compl) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_queue *queue = cryptd_get_queue(crypto_ahash_tfm(tfm)); rctx->complete = req->base.complete; rctx->data = req->base.data; req->base.complete = compl; req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } static struct shash_desc *cryptd_hash_prepare(struct ahash_request *req, int err) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); req->base.complete = rctx->complete; req->base.data = rctx->data; if (unlikely(err == -EINPROGRESS)) return NULL; return &rctx->desc; } static void cryptd_hash_complete(struct ahash_request *req, int err, crypto_completion_t complete) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); int refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); ahash_request_complete(req, err); local_bh_enable(); if (err == -EINPROGRESS) { req->base.complete = complete; req->base.data = req; } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_ahash(tfm); } static void cryptd_hash_init(void *data, int err) { struct ahash_request *req = data; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *child = ctx->child; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (unlikely(!desc)) goto out; desc->tfm = child; err = crypto_shash_init(desc); out: cryptd_hash_complete(req, err, cryptd_hash_init); } static int cryptd_hash_init_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_init); } static void cryptd_hash_update(void *data, int err) { struct ahash_request *req = data; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (likely(desc)) err = shash_ahash_update(req, desc); cryptd_hash_complete(req, err, cryptd_hash_update); } static int cryptd_hash_update_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_update); } static void cryptd_hash_final(void *data, int err) { struct ahash_request *req = data; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (likely(desc)) err = crypto_shash_final(desc, req->result); cryptd_hash_complete(req, err, cryptd_hash_final); } static int cryptd_hash_final_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_final); } static void cryptd_hash_finup(void *data, int err) { struct ahash_request *req = data; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (likely(desc)) err = shash_ahash_finup(req, desc); cryptd_hash_complete(req, err, cryptd_hash_finup); } static int cryptd_hash_finup_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_finup); } static void cryptd_hash_digest(void *data, int err) { struct ahash_request *req = data; struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct crypto_shash *child = ctx->child; struct shash_desc *desc; desc = cryptd_hash_prepare(req, err); if (unlikely(!desc)) goto out; desc->tfm = child; err = shash_ahash_digest(req, desc); out: cryptd_hash_complete(req, err, cryptd_hash_digest); } static int cryptd_hash_digest_enqueue(struct ahash_request *req) { return cryptd_hash_enqueue(req, cryptd_hash_digest); } static int cryptd_hash_export(struct ahash_request *req, void *out) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); return crypto_shash_export(&rctx->desc, out); } static int cryptd_hash_import(struct ahash_request *req, const void *in) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); struct shash_desc *desc = cryptd_shash_desc(req); desc->tfm = ctx->child; return crypto_shash_import(desc, in); } static void cryptd_hash_free(struct ahash_instance *inst) { struct hashd_instance_ctx *ctx = ahash_instance_ctx(inst); crypto_drop_shash(&ctx->spawn); kfree(inst); } static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt, struct cryptd_queue *queue) { struct hashd_instance_ctx *ctx; struct ahash_instance *inst; struct shash_alg *alg; u32 type; u32 mask; int err; cryptd_type_and_mask(algt, &type, &mask); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = ahash_instance_ctx(inst); ctx->queue = queue; err = crypto_grab_shash(&ctx->spawn, ahash_crypto_instance(inst), crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; alg = crypto_spawn_shash_alg(&ctx->spawn); err = cryptd_init_instance(ahash_crypto_instance(inst), &alg->base); if (err) goto err_free_inst; inst->alg.halg.base.cra_flags |= CRYPTO_ALG_ASYNC | (alg->base.cra_flags & (CRYPTO_ALG_INTERNAL| CRYPTO_ALG_OPTIONAL_KEY)); inst->alg.halg.digestsize = alg->digestsize; inst->alg.halg.statesize = alg->statesize; inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx); inst->alg.init_tfm = cryptd_hash_init_tfm; inst->alg.clone_tfm = cryptd_hash_clone_tfm; inst->alg.exit_tfm = cryptd_hash_exit_tfm; inst->alg.init = cryptd_hash_init_enqueue; inst->alg.update = cryptd_hash_update_enqueue; inst->alg.final = cryptd_hash_final_enqueue; inst->alg.finup = cryptd_hash_finup_enqueue; inst->alg.export = cryptd_hash_export; inst->alg.import = cryptd_hash_import; if (crypto_shash_alg_has_setkey(alg)) inst->alg.setkey = cryptd_hash_setkey; inst->alg.digest = cryptd_hash_digest_enqueue; inst->free = cryptd_hash_free; err = ahash_register_instance(tmpl, inst); if (err) { err_free_inst: cryptd_hash_free(inst); } return err; } static int cryptd_aead_setkey(struct crypto_aead *parent, const u8 *key, unsigned int keylen) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(parent); struct crypto_aead *child = ctx->child; return crypto_aead_setkey(child, key, keylen); } static int cryptd_aead_setauthsize(struct crypto_aead *parent, unsigned int authsize) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(parent); struct crypto_aead *child = ctx->child; return crypto_aead_setauthsize(child, authsize); } static void cryptd_aead_crypt(struct aead_request *req, struct crypto_aead *child, int err, int (*crypt)(struct aead_request *req), crypto_completion_t compl) { struct cryptd_aead_request_ctx *rctx; struct aead_request *subreq; struct cryptd_aead_ctx *ctx; struct crypto_aead *tfm; int refcnt; rctx = aead_request_ctx(req); subreq = &rctx->req; req->base.complete = subreq->base.complete; req->base.data = subreq->base.data; tfm = crypto_aead_reqtfm(req); if (unlikely(err == -EINPROGRESS)) goto out; aead_request_set_tfm(subreq, child); aead_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); aead_request_set_ad(subreq, req->assoclen); err = crypt(subreq); out: ctx = crypto_aead_ctx(tfm); refcnt = refcount_read(&ctx->refcnt); local_bh_disable(); aead_request_complete(req, err); local_bh_enable(); if (err == -EINPROGRESS) { subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = compl; req->base.data = req; } else if (refcnt && refcount_dec_and_test(&ctx->refcnt)) crypto_free_aead(tfm); } static void cryptd_aead_encrypt(void *data, int err) { struct aead_request *req = data; struct cryptd_aead_ctx *ctx; struct crypto_aead *child; ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); child = ctx->child; cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->encrypt, cryptd_aead_encrypt); } static void cryptd_aead_decrypt(void *data, int err) { struct aead_request *req = data; struct cryptd_aead_ctx *ctx; struct crypto_aead *child; ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); child = ctx->child; cryptd_aead_crypt(req, child, err, crypto_aead_alg(child)->decrypt, cryptd_aead_decrypt); } static int cryptd_aead_enqueue(struct aead_request *req, crypto_completion_t compl) { struct cryptd_aead_request_ctx *rctx = aead_request_ctx(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cryptd_queue *queue = cryptd_get_queue(crypto_aead_tfm(tfm)); struct aead_request *subreq = &rctx->req; subreq->base.complete = req->base.complete; subreq->base.data = req->base.data; req->base.complete = compl; req->base.data = req; return cryptd_enqueue_request(queue, &req->base); } static int cryptd_aead_encrypt_enqueue(struct aead_request *req) { return cryptd_aead_enqueue(req, cryptd_aead_encrypt ); } static int cryptd_aead_decrypt_enqueue(struct aead_request *req) { return cryptd_aead_enqueue(req, cryptd_aead_decrypt ); } static int cryptd_aead_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct aead_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_aead_spawn *spawn = &ictx->aead_spawn; struct cryptd_aead_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_aead *cipher; cipher = crypto_spawn_aead(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; crypto_aead_set_reqsize( tfm, sizeof(struct cryptd_aead_request_ctx) + crypto_aead_reqsize(cipher)); return 0; } static void cryptd_aead_exit_tfm(struct crypto_aead *tfm) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); } static void cryptd_aead_free(struct aead_instance *inst) { struct aead_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_aead(&ctx->aead_spawn); kfree(inst); } static int cryptd_create_aead(struct crypto_template *tmpl, struct rtattr **tb, struct crypto_attr_type *algt, struct cryptd_queue *queue) { struct aead_instance_ctx *ctx; struct aead_instance *inst; struct aead_alg *alg; u32 type; u32 mask; int err; cryptd_type_and_mask(algt, &type, &mask); inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); if (!inst) return -ENOMEM; ctx = aead_instance_ctx(inst); ctx->queue = queue; err = crypto_grab_aead(&ctx->aead_spawn, aead_crypto_instance(inst), crypto_attr_alg_name(tb[1]), type, mask); if (err) goto err_free_inst; alg = crypto_spawn_aead_alg(&ctx->aead_spawn); err = cryptd_init_instance(aead_crypto_instance(inst), &alg->base); if (err) goto err_free_inst; inst->alg.base.cra_flags |= CRYPTO_ALG_ASYNC | (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); inst->alg.base.cra_ctxsize = sizeof(struct cryptd_aead_ctx); inst->alg.ivsize = crypto_aead_alg_ivsize(alg); inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg); inst->alg.init = cryptd_aead_init_tfm; inst->alg.exit = cryptd_aead_exit_tfm; inst->alg.setkey = cryptd_aead_setkey; inst->alg.setauthsize = cryptd_aead_setauthsize; inst->alg.encrypt = cryptd_aead_encrypt_enqueue; inst->alg.decrypt = cryptd_aead_decrypt_enqueue; inst->free = cryptd_aead_free; err = aead_register_instance(tmpl, inst); if (err) { err_free_inst: cryptd_aead_free(inst); } return err; } static struct cryptd_queue queue; static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_attr_type *algt; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_LSKCIPHER: return cryptd_create_skcipher(tmpl, tb, algt, &queue); case CRYPTO_ALG_TYPE_HASH: return cryptd_create_hash(tmpl, tb, algt, &queue); case CRYPTO_ALG_TYPE_AEAD: return cryptd_create_aead(tmpl, tb, algt, &queue); } return -EINVAL; } static struct crypto_template cryptd_tmpl = { .name = "cryptd", .create = cryptd_create, .module = THIS_MODULE, }; struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; struct cryptd_skcipher_ctx *ctx; struct crypto_skcipher *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) return ERR_PTR(-EINVAL); tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { crypto_free_skcipher(tfm); return ERR_PTR(-EINVAL); } ctx = crypto_skcipher_ctx(tfm); refcount_set(&ctx->refcnt, 1); return container_of(tfm, struct cryptd_skcipher, base); } EXPORT_SYMBOL_GPL(cryptd_alloc_skcipher); struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_skcipher_child); bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); return refcount_read(&ctx->refcnt) - 1; } EXPORT_SYMBOL_GPL(cryptd_skcipher_queued); void cryptd_free_skcipher(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); if (refcount_dec_and_test(&ctx->refcnt)) crypto_free_skcipher(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_skcipher); struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; struct cryptd_hash_ctx *ctx; struct crypto_ahash *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) return ERR_PTR(-EINVAL); tfm = crypto_alloc_ahash(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { crypto_free_ahash(tfm); return ERR_PTR(-EINVAL); } ctx = crypto_ahash_ctx(tfm); refcount_set(&ctx->refcnt, 1); return __cryptd_ahash_cast(tfm); } EXPORT_SYMBOL_GPL(cryptd_alloc_ahash); struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_ahash_child); struct shash_desc *cryptd_shash_desc(struct ahash_request *req) { struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); return &rctx->desc; } EXPORT_SYMBOL_GPL(cryptd_shash_desc); bool cryptd_ahash_queued(struct cryptd_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); return refcount_read(&ctx->refcnt) - 1; } EXPORT_SYMBOL_GPL(cryptd_ahash_queued); void cryptd_free_ahash(struct cryptd_ahash *tfm) { struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); if (refcount_dec_and_test(&ctx->refcnt)) crypto_free_ahash(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_ahash); struct cryptd_aead *cryptd_alloc_aead(const char *alg_name, u32 type, u32 mask) { char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; struct cryptd_aead_ctx *ctx; struct crypto_aead *tfm; if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) return ERR_PTR(-EINVAL); tfm = crypto_alloc_aead(cryptd_alg_name, type, mask); if (IS_ERR(tfm)) return ERR_CAST(tfm); if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { crypto_free_aead(tfm); return ERR_PTR(-EINVAL); } ctx = crypto_aead_ctx(tfm); refcount_set(&ctx->refcnt, 1); return __cryptd_aead_cast(tfm); } EXPORT_SYMBOL_GPL(cryptd_alloc_aead); struct crypto_aead *cryptd_aead_child(struct cryptd_aead *tfm) { struct cryptd_aead_ctx *ctx; ctx = crypto_aead_ctx(&tfm->base); return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_aead_child); bool cryptd_aead_queued(struct cryptd_aead *tfm) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base); return refcount_read(&ctx->refcnt) - 1; } EXPORT_SYMBOL_GPL(cryptd_aead_queued); void cryptd_free_aead(struct cryptd_aead *tfm) { struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base); if (refcount_dec_and_test(&ctx->refcnt)) crypto_free_aead(&tfm->base); } EXPORT_SYMBOL_GPL(cryptd_free_aead); static int __init cryptd_init(void) { int err; cryptd_wq = alloc_workqueue("cryptd", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); if (!cryptd_wq) return -ENOMEM; err = cryptd_init_queue(&queue, cryptd_max_cpu_qlen); if (err) goto err_destroy_wq; err = crypto_register_template(&cryptd_tmpl); if (err) goto err_fini_queue; return 0; err_fini_queue: cryptd_fini_queue(&queue); err_destroy_wq: destroy_workqueue(cryptd_wq); return err; } static void __exit cryptd_exit(void) { destroy_workqueue(cryptd_wq); cryptd_fini_queue(&queue); crypto_unregister_template(&cryptd_tmpl); } module_init(cryptd_init); module_exit(cryptd_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Software async crypto daemon"); MODULE_ALIAS_CRYPTO("cryptd");
35 10 1 19 72 16 17 12 7 13 13 259 9 4 2 19 4 54 1 53 99 139 139 140 140 4 140 24 31 67 22 1 3 4 21 21 21 20 21 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ #ifndef _LINUX_SKMSG_H #define _LINUX_SKMSG_H #include <linux/bpf.h> #include <linux/filter.h> #include <linux/scatterlist.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp.h> #include <net/strparser.h> #define MAX_MSG_FRAGS MAX_SKB_FRAGS #define NR_MSG_FRAG_IDS (MAX_MSG_FRAGS + 1) enum __sk_action { __SK_DROP = 0, __SK_PASS, __SK_REDIRECT, __SK_NONE, }; struct sk_msg_sg { u32 start; u32 curr; u32 end; u32 size; u32 copybreak; DECLARE_BITMAP(copy, MAX_MSG_FRAGS + 2); /* The extra two elements: * 1) used for chaining the front and sections when the list becomes * partitioned (e.g. end < start). The crypto APIs require the * chaining; * 2) to chain tailer SG entries after the message. */ struct scatterlist data[MAX_MSG_FRAGS + 2]; }; /* UAPI in filter.c depends on struct sk_msg_sg being first element. */ struct sk_msg { struct sk_msg_sg sg; void *data; void *data_end; u32 apply_bytes; u32 cork_bytes; u32 flags; struct sk_buff *skb; struct sock *sk_redir; struct sock *sk; struct list_head list; }; struct sk_psock_progs { struct bpf_prog *msg_parser; struct bpf_prog *stream_parser; struct bpf_prog *stream_verdict; struct bpf_prog *skb_verdict; struct bpf_link *msg_parser_link; struct bpf_link *stream_parser_link; struct bpf_link *stream_verdict_link; struct bpf_link *skb_verdict_link; }; enum sk_psock_state_bits { SK_PSOCK_TX_ENABLED, SK_PSOCK_RX_STRP_ENABLED, }; struct sk_psock_link { struct list_head list; struct bpf_map *map; void *link_raw; }; struct sk_psock_work_state { u32 len; u32 off; }; struct sk_psock { struct sock *sk; struct sock *sk_redir; u32 apply_bytes; u32 cork_bytes; u32 eval; bool redir_ingress; /* undefined if sk_redir is null */ struct sk_msg *cork; struct sk_psock_progs progs; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) struct strparser strp; u32 copied_seq; u32 ingress_bytes; #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; spinlock_t ingress_lock; unsigned long state; struct list_head link; spinlock_t link_lock; refcount_t refcnt; void (*saved_unhash)(struct sock *sk); void (*saved_destroy)(struct sock *sk); void (*saved_close)(struct sock *sk, long timeout); void (*saved_write_space)(struct sock *sk); void (*saved_data_ready)(struct sock *sk); /* psock_update_sk_prot may be called with restore=false many times * so the handler must be safe for this case. It will be called * exactly once with restore=true when the psock is being destroyed * and psock refcnt is zero, but before an RCU grace period. */ int (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock, bool restore); struct proto *sk_proto; struct mutex work_mutex; struct sk_psock_work_state work_state; struct delayed_work work; struct sock *sk_pair; struct rcu_work rwork; }; int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, int elem_first_coalesce); int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src, u32 off, u32 len); void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len); int sk_msg_free(struct sock *sk, struct sk_msg *msg); int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg); void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes); void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg, u32 bytes); void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes); void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes); int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); bool sk_msg_is_readable(struct sock *sk); static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) { WARN_ON(i == msg->sg.end && bytes); } static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes) { if (psock->apply_bytes) { if (psock->apply_bytes < bytes) psock->apply_bytes = 0; else psock->apply_bytes -= bytes; } } static inline u32 sk_msg_iter_dist(u32 start, u32 end) { return end >= start ? end - start : end + (NR_MSG_FRAG_IDS - start); } #define sk_msg_iter_var_prev(var) \ do { \ if (var == 0) \ var = NR_MSG_FRAG_IDS - 1; \ else \ var--; \ } while (0) #define sk_msg_iter_var_next(var) \ do { \ var++; \ if (var == NR_MSG_FRAG_IDS) \ var = 0; \ } while (0) #define sk_msg_iter_prev(msg, which) \ sk_msg_iter_var_prev(msg->sg.which) #define sk_msg_iter_next(msg, which) \ sk_msg_iter_var_next(msg->sg.which) static inline void sk_msg_init(struct sk_msg *msg) { BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != NR_MSG_FRAG_IDS); memset(msg, 0, sizeof(*msg)); sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS); } static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src, int which, u32 size) { dst->sg.data[which] = src->sg.data[which]; dst->sg.data[which].length = size; dst->sg.size += size; src->sg.size -= size; src->sg.data[which].length -= size; src->sg.data[which].offset += size; } static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src) { memcpy(dst, src, sizeof(*src)); sk_msg_init(src); } static inline bool sk_msg_full(const struct sk_msg *msg) { return sk_msg_iter_dist(msg->sg.start, msg->sg.end) == MAX_MSG_FRAGS; } static inline u32 sk_msg_elem_used(const struct sk_msg *msg) { return sk_msg_iter_dist(msg->sg.start, msg->sg.end); } static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) { return &msg->sg.data[which]; } static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which) { return msg->sg.data[which]; } static inline struct page *sk_msg_page(struct sk_msg *msg, int which) { return sg_page(sk_msg_elem(msg, which)); } static inline bool sk_msg_to_ingress(const struct sk_msg *msg) { return msg->flags & BPF_F_INGRESS; } static inline void sk_msg_compute_data_pointers(struct sk_msg *msg) { struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start); if (test_bit(msg->sg.start, msg->sg.copy)) { msg->data = NULL; msg->data_end = NULL; } else { msg->data = sg_virt(sge); msg->data_end = msg->data + sge->length; } } static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page, u32 len, u32 offset) { struct scatterlist *sge; get_page(page); sge = sk_msg_elem(msg, msg->sg.end); sg_set_page(sge, page, len, offset); sg_unmark_end(sge); __set_bit(msg->sg.end, msg->sg.copy); msg->sg.size += len; sk_msg_iter_next(msg, end); } static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state) { do { if (copy_state) __set_bit(i, msg->sg.copy); else __clear_bit(i, msg->sg.copy); sk_msg_iter_var_next(i); if (i == msg->sg.end) break; } while (1); } static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start) { sk_msg_sg_copy(msg, start, true); } static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start) { sk_msg_sg_copy(msg, start, false); } static inline struct sk_psock *sk_psock(const struct sock *sk) { return __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_PSOCK); } static inline void sk_psock_set_state(struct sk_psock *psock, enum sk_psock_state_bits bit) { set_bit(bit, &psock->state); } static inline void sk_psock_clear_state(struct sk_psock *psock, enum sk_psock_state_bits bit) { clear_bit(bit, &psock->state); } static inline bool sk_psock_test_state(const struct sk_psock *psock, enum sk_psock_state_bits bit) { return test_bit(bit, &psock->state); } static inline void sock_drop(struct sock *sk, struct sk_buff *skb) { sk_drops_add(sk, skb); kfree_skb(skb); } static inline bool sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { bool ret; spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { list_add_tail(&msg->list, &psock->ingress_msg); ret = true; } else { sk_msg_free(psock->sk, msg); kfree(msg); ret = false; } spin_unlock_bh(&psock->ingress_lock); return ret; } static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) { struct sk_msg *msg; spin_lock_bh(&psock->ingress_lock); msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); if (msg) list_del(&msg->list); spin_unlock_bh(&psock->ingress_lock); return msg; } static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock) { struct sk_msg *msg; spin_lock_bh(&psock->ingress_lock); msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); spin_unlock_bh(&psock->ingress_lock); return msg; } static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock, struct sk_msg *msg) { struct sk_msg *ret; spin_lock_bh(&psock->ingress_lock); if (list_is_last(&msg->list, &psock->ingress_msg)) ret = NULL; else ret = list_next_entry(msg, list); spin_unlock_bh(&psock->ingress_lock); return ret; } static inline bool sk_psock_queue_empty(const struct sk_psock *psock) { return psock ? list_empty(&psock->ingress_msg) : true; } static inline void kfree_sk_msg(struct sk_msg *msg) { if (msg->skb) consume_skb(msg->skb); kfree(msg); } static inline void sk_psock_report_error(struct sk_psock *psock, int err) { struct sock *sk = psock->sk; sk->sk_err = err; sk_error_report(sk); } struct sk_psock *sk_psock_init(struct sock *sk, int node); void sk_psock_stop(struct sk_psock *psock); #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock); #else static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) { return -EOPNOTSUPP; } static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) { } static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) { } #endif void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock); void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg); /* * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). The typecast is * intentional to enforce typesafety. */ #define sk_psock_init_link() \ ((struct sk_psock_link *)kzalloc(sizeof(struct sk_psock_link), \ GFP_ATOMIC | __GFP_NOWARN)) static inline void sk_psock_free_link(struct sk_psock_link *link) { kfree(link); } struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); static inline void sk_psock_cork_free(struct sk_psock *psock) { if (psock->cork) { sk_msg_free(psock->sk, psock->cork); kfree(psock->cork); psock->cork = NULL; } } static inline void sk_psock_restore_proto(struct sock *sk, struct sk_psock *psock) { if (psock->psock_update_sk_prot) psock->psock_update_sk_prot(sk, psock, true); } static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; rcu_read_lock(); psock = sk_psock(sk); if (psock && !refcount_inc_not_zero(&psock->refcnt)) psock = NULL; rcu_read_unlock(); return psock; } void sk_psock_drop(struct sock *sk, struct sk_psock *psock); static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) { if (refcount_dec_and_test(&psock->refcnt)) sk_psock_drop(sk, psock); } static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, struct bpf_prog *prog) { prog = xchg(pprog, prog); if (prog) bpf_prog_put(prog); } static inline int psock_replace_prog(struct bpf_prog **pprog, struct bpf_prog *prog, struct bpf_prog *old) { if (cmpxchg(pprog, old, prog) != old) return -ENOENT; if (old) bpf_prog_put(old); return 0; } static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); psock_set_prog(&progs->stream_parser, NULL); psock_set_prog(&progs->stream_verdict, NULL); psock_set_prog(&progs->skb_verdict, NULL); } int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); static inline bool sk_psock_strp_enabled(struct sk_psock *psock) { if (!psock) return false; return !!psock->saved_data_ready; } #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) /* We only have two bits so far. */ #define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER) static inline bool skb_bpf_strparser(const struct sk_buff *skb) { unsigned long sk_redir = skb->_sk_redir; return sk_redir & BPF_F_STRPARSER; } static inline void skb_bpf_set_strparser(struct sk_buff *skb) { skb->_sk_redir |= BPF_F_STRPARSER; } static inline bool skb_bpf_ingress(const struct sk_buff *skb) { unsigned long sk_redir = skb->_sk_redir; return sk_redir & BPF_F_INGRESS; } static inline void skb_bpf_set_ingress(struct sk_buff *skb) { skb->_sk_redir |= BPF_F_INGRESS; } static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir, bool ingress) { skb->_sk_redir = (unsigned long)sk_redir; if (ingress) skb->_sk_redir |= BPF_F_INGRESS; } static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb) { unsigned long sk_redir = skb->_sk_redir; return (struct sock *)(sk_redir & BPF_F_PTR_MASK); } static inline void skb_bpf_redirect_clear(struct sk_buff *skb) { skb->_sk_redir = 0; } #endif /* CONFIG_NET_SOCK_MSG */ #endif /* _LINUX_SKMSG_H */
1980 1120 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NAMEI_H #define _LINUX_NAMEI_H #include <linux/fs.h> #include <linux/kernel.h> #include <linux/path.h> #include <linux/fcntl.h> #include <linux/errno.h> enum { MAX_NESTED_LINKS = 8 }; #define MAXSYMLINKS 40 /* * Type of the last component on LOOKUP_PARENT */ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; /* pathwalk mode */ #define LOOKUP_FOLLOW BIT(0) /* follow links at the end */ #define LOOKUP_DIRECTORY BIT(1) /* require a directory */ #define LOOKUP_AUTOMOUNT BIT(2) /* force terminal automount */ #define LOOKUP_EMPTY BIT(3) /* accept empty path [user_... only] */ #define LOOKUP_LINKAT_EMPTY BIT(4) /* Linkat request with empty path. */ #define LOOKUP_DOWN BIT(5) /* follow mounts in the starting point */ #define LOOKUP_MOUNTPOINT BIT(6) /* follow mounts in the end */ #define LOOKUP_REVAL BIT(7) /* tell ->d_revalidate() to trust no cache */ #define LOOKUP_RCU BIT(8) /* RCU pathwalk mode; semi-internal */ #define LOOKUP_CACHED BIT(9) /* Only do cached lookup */ #define LOOKUP_PARENT BIT(10) /* Looking up final parent in path */ /* 5 spare bits for pathwalk */ /* These tell filesystem methods that we are dealing with the final component... */ #define LOOKUP_OPEN BIT(16) /* ... in open */ #define LOOKUP_CREATE BIT(17) /* ... in object creation */ #define LOOKUP_EXCL BIT(18) /* ... in target must not exist */ #define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */ /* 4 spare bits for intent */ /* Scoping flags for lookup. */ #define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */ #define LOOKUP_NO_MAGICLINKS BIT(25) /* No nd_jump_link() crossing. */ #define LOOKUP_NO_XDEV BIT(26) /* No mountpoint crossing. */ #define LOOKUP_BENEATH BIT(27) /* No escaping from starting point. */ #define LOOKUP_IN_ROOT BIT(28) /* Treat dirfd as fs root. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) /* 3 spare bits for scoping */ extern int path_pts(struct path *path); extern int user_path_at(int, const char __user *, unsigned, struct path *); struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, unsigned int flags); extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int); extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root); int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); extern struct dentry *try_lookup_noperm(struct qstr *, struct dentry *); extern struct dentry *lookup_noperm(struct qstr *, struct dentry *); extern struct dentry *lookup_noperm_unlocked(struct qstr *, struct dentry *); extern struct dentry *lookup_noperm_positive_unlocked(struct qstr *, struct dentry *); struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *); struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base); struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base); extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); /** * mode_strip_umask - handle vfs umask stripping * @dir: parent directory of the new inode * @mode: mode of the new inode to be created in @dir * * In most filesystems, umask stripping depends on whether or not the * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask * stripping is done directly in here. If the filesystem does support POSIX * ACLs umask stripping is deferred until the filesystem calls * posix_acl_create(). * * Some filesystems (like NFSv4) also want to avoid umask stripping by the * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK * to get this effect without declaring that they support POSIX ACLs. * * Returns: mode */ static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode) { if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK)) mode &= ~current_umask(); return mode; } extern int __must_check nd_jump_link(const struct path *path); static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { ((char *) name)[min(len, maxlen)] = '\0'; } /** * retry_estale - determine whether the caller should retry an operation * @error: the error that would currently be returned * @flags: flags being used for next lookup attempt * * Check to see if the error code was -ESTALE, and then determine whether * to retry the call based on whether "flags" already has LOOKUP_REVAL set. * * Returns true if the caller should try the operation again. */ static inline bool retry_estale(const long error, const unsigned int flags) { return unlikely(error == -ESTALE && !(flags & LOOKUP_REVAL)); } #endif /* _LINUX_NAMEI_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000,2005 Silicon Graphics, Inc. * All Rights Reserved. */ #ifndef __XFS_IALLOC_H__ #define __XFS_IALLOC_H__ struct xfs_buf; struct xfs_dinode; struct xfs_imap; struct xfs_mount; struct xfs_trans; struct xfs_btree_cur; struct xfs_perag; /* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 struct xfs_icluster { bool deleted; /* record is deleted */ xfs_ino_t first_ino; /* first inode number */ uint64_t alloc; /* inode phys. allocation bitmap for * sparse chunks */ }; /* * Make an inode pointer out of the buffer/offset. */ static inline struct xfs_dinode * xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) { return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog); } struct xfs_icreate_args; /* * Allocate an inode on disk. Mode is used to tell whether the new inode will * need space, and whether it is a directory. */ int xfs_dialloc(struct xfs_trans **tpp, const struct xfs_icreate_args *args, xfs_ino_t *new_ino); int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag, xfs_ino_t ino, struct xfs_icluster *ifree); /* * Return the location of the inode in imap, for mapping it into a buffer. */ int xfs_imap( struct xfs_perag *pag, struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t ino, /* inode to locate */ struct xfs_imap *imap, /* location map structure */ uint flags); /* flags for inode btree lookup */ /* * Log specified fields for the ag hdr (inode section) */ void xfs_ialloc_log_agi( struct xfs_trans *tp, /* transaction pointer */ struct xfs_buf *bp, /* allocation group header buffer */ uint32_t fields); /* bitmask of fields to log */ int xfs_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, xfs_buf_flags_t flags, struct xfs_buf **agibpp); int xfs_ialloc_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, int flags, struct xfs_buf **agibpp); #define XFS_IALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */ /* * Lookup a record by ino in the btree given by cur. */ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, xfs_lookup_t dir, int *stat); /* * Get the data from the pointed-to record. */ int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_inobt_rec_incore_t *rec, int *stat); uint8_t xfs_inobt_rec_freecount(const struct xfs_inobt_rec_incore *irec); /* * Inode chunk initialisation routine */ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, struct list_head *buffer_list, int icount, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen); union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, const union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); xfs_failaddr_t xfs_inobt_check_irec(struct xfs_perag *pag, const struct xfs_inobt_rec_incore *irec); int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, enum xbtree_recpacking *outcome); int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count, xfs_agino_t *freecount); int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask, uint8_t count, int32_t freecount, xfs_inofree_t free, int *stat); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp); xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); int xfs_ialloc_check_shrink(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agibp, xfs_agblock_t new_length); #endif /* __XFS_IALLOC_H__ */
27 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "timers.h" #include "device.h" #include "peer.h" #include "queueing.h" #include "socket.h" /* * - Timer for retransmitting the handshake if we don't hear back after * `REKEY_TIMEOUT + jitter` ms. * * - Timer for sending empty packet if we have received a packet but after have * not sent one for `KEEPALIVE_TIMEOUT` ms. * * - Timer for initiating new handshake if we have sent a packet but after have * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + * jitter` ms. * * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms * if no new keys have been received. * * - Timer for, if enabled, sending an empty authenticated packet every user- * specified seconds. */ static inline void mod_peer_timer(struct wg_peer *peer, struct timer_list *timer, unsigned long expires) { rcu_read_lock_bh(); if (likely(netif_running(peer->device->dev) && !READ_ONCE(peer->is_dead))) mod_timer(timer, expires); rcu_read_unlock_bh(); } static void wg_expired_retransmit_handshake(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_retransmit_handshake); if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)MAX_TIMER_HANDSHAKES + 2); timer_delete(&peer->timer_send_keepalive); /* We drop all packets without a keypair and don't try again, * if we try unsuccessfully for too long to make a handshake. */ wg_packet_purge_staged_packets(peer); /* We set a timer for destroying any residue that might be left * of a partial exchange. */ if (!timer_pending(&peer->timer_zero_key_material)) mod_peer_timer(peer, &peer->timer_zero_key_material, jiffies + REJECT_AFTER_TIME * 3 * HZ); } else { ++peer->timer_handshake_attempts; pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)REKEY_TIMEOUT, peer->timer_handshake_attempts + 1); /* We clear the endpoint address src address, in case this is * the cause of trouble. */ wg_socket_clear_peer_endpoint_src(peer); wg_packet_send_queued_handshake_initiation(peer, true); } } static void wg_expired_send_keepalive(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_send_keepalive); wg_packet_send_keepalive(peer); if (peer->timer_need_another_keepalive) { peer->timer_need_another_keepalive = false; mod_peer_timer(peer, &peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT * HZ); } } static void wg_expired_new_handshake(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_new_handshake); pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)); /* We clear the endpoint address src address, in case this is the cause * of trouble. */ wg_socket_clear_peer_endpoint_src(peer); wg_packet_send_queued_handshake_initiation(peer, false); } static void wg_expired_zero_key_material(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_zero_key_material); rcu_read_lock_bh(); if (!READ_ONCE(peer->is_dead)) { wg_peer_get(peer); if (!queue_work(peer->device->handshake_send_wq, &peer->clear_peer_work)) /* If the work was already on the queue, we want to drop * the extra reference. */ wg_peer_put(peer); } rcu_read_unlock_bh(); } static void wg_queued_expired_zero_key_material(struct work_struct *work) { struct wg_peer *peer = container_of(work, struct wg_peer, clear_peer_work); pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr, (int)REJECT_AFTER_TIME * 3); wg_noise_handshake_clear(&peer->handshake); wg_noise_keypairs_clear(&peer->keypairs); wg_peer_put(peer); } static void wg_expired_send_persistent_keepalive(struct timer_list *timer) { struct wg_peer *peer = timer_container_of(peer, timer, timer_persistent_keepalive); if (likely(peer->persistent_keepalive_interval)) wg_packet_send_keepalive(peer); } /* Should be called after an authenticated data packet is sent. */ void wg_timers_data_sent(struct wg_peer *peer) { if (!timer_pending(&peer->timer_new_handshake)) mod_peer_timer(peer, &peer->timer_new_handshake, jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + get_random_u32_below(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); } /* Should be called after an authenticated data packet is received. */ void wg_timers_data_received(struct wg_peer *peer) { if (likely(netif_running(peer->device->dev))) { if (!timer_pending(&peer->timer_send_keepalive)) mod_peer_timer(peer, &peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT * HZ); else peer->timer_need_another_keepalive = true; } } /* Should be called after any type of authenticated packet is sent, whether * keepalive, data, or handshake. */ void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) { timer_delete(&peer->timer_send_keepalive); } /* Should be called after any type of authenticated packet is received, whether * keepalive, data, or handshake. */ void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) { timer_delete(&peer->timer_new_handshake); } /* Should be called after a handshake initiation message is sent. */ void wg_timers_handshake_initiated(struct wg_peer *peer) { mod_peer_timer(peer, &peer->timer_retransmit_handshake, jiffies + REKEY_TIMEOUT * HZ + get_random_u32_below(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); } /* Should be called after a handshake response message is received and processed * or when getting key confirmation via the first data message. */ void wg_timers_handshake_complete(struct wg_peer *peer) { timer_delete(&peer->timer_retransmit_handshake); peer->timer_handshake_attempts = 0; peer->sent_lastminute_handshake = false; ktime_get_real_ts64(&peer->walltime_last_handshake); } /* Should be called after an ephemeral key is created, which is before sending a * handshake response or after receiving a handshake response. */ void wg_timers_session_derived(struct wg_peer *peer) { mod_peer_timer(peer, &peer->timer_zero_key_material, jiffies + REJECT_AFTER_TIME * 3 * HZ); } /* Should be called before a packet with authentication, whether * keepalive, data, or handshakem is sent, or after one is received. */ void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) { if (peer->persistent_keepalive_interval) mod_peer_timer(peer, &peer->timer_persistent_keepalive, jiffies + peer->persistent_keepalive_interval * HZ); } void wg_timers_init(struct wg_peer *peer) { timer_setup(&peer->timer_retransmit_handshake, wg_expired_retransmit_handshake, 0); timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); timer_setup(&peer->timer_zero_key_material, wg_expired_zero_key_material, 0); timer_setup(&peer->timer_persistent_keepalive, wg_expired_send_persistent_keepalive, 0); INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); peer->timer_handshake_attempts = 0; peer->sent_lastminute_handshake = false; peer->timer_need_another_keepalive = false; } void wg_timers_stop(struct wg_peer *peer) { timer_delete_sync(&peer->timer_retransmit_handshake); timer_delete_sync(&peer->timer_send_keepalive); timer_delete_sync(&peer->timer_new_handshake); timer_delete_sync(&peer->timer_zero_key_material); timer_delete_sync(&peer->timer_persistent_keepalive); flush_work(&peer->clear_peer_work); }
29 2 1 2 2 1 1 1 3 16 7 7 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2022 Pablo Neira Ayuso <pablo@netfilter.org> */ #include <linux/kernel.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_meta.h> #include <net/netfilter/nf_tables_offload.h> #include <linux/tcp.h> #include <linux/udp.h> #include <net/gre.h> #include <net/geneve.h> #include <net/ip.h> #include <linux/icmpv6.h> #include <linux/ip.h> #include <linux/ipv6.h> struct nft_inner_tun_ctx_locked { struct nft_inner_tun_ctx ctx; local_lock_t bh_lock; }; static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; /* Same layout as nft_expr but it embeds the private expression data area. */ struct __nft_expr { const struct nft_expr_ops *ops; union { struct nft_payload payload; struct nft_meta meta; } __attribute__((aligned(__alignof__(u64)))); }; enum { NFT_INNER_EXPR_PAYLOAD, NFT_INNER_EXPR_META, }; struct nft_inner { u8 flags; u8 hdrsize; u8 type; u8 expr_type; struct __nft_expr expr; }; static int nft_inner_parse_l2l3(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 off) { __be16 llproto, outer_llproto; u32 nhoff, thoff; if (priv->flags & NFT_INNER_LL) { struct vlan_ethhdr *veth, _veth; struct ethhdr *eth, _eth; u32 hdrsize; eth = skb_header_pointer(pkt->skb, off, sizeof(_eth), &_eth); if (!eth) return -1; switch (eth->h_proto) { case htons(ETH_P_IP): case htons(ETH_P_IPV6): llproto = eth->h_proto; hdrsize = sizeof(_eth); break; case htons(ETH_P_8021Q): veth = skb_header_pointer(pkt->skb, off, sizeof(_veth), &_veth); if (!veth) return -1; outer_llproto = veth->h_vlan_encapsulated_proto; llproto = veth->h_vlan_proto; hdrsize = sizeof(_veth); break; default: return -1; } ctx->inner_lloff = off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_LL; off += hdrsize; } else { struct iphdr *iph; u32 _version; iph = skb_header_pointer(pkt->skb, off, sizeof(_version), &_version); if (!iph) return -1; switch (iph->version) { case 4: llproto = htons(ETH_P_IP); break; case 6: llproto = htons(ETH_P_IPV6); break; default: return -1; } } ctx->llproto = llproto; if (llproto == htons(ETH_P_8021Q)) llproto = outer_llproto; nhoff = off; switch (llproto) { case htons(ETH_P_IP): { struct iphdr *iph, _iph; iph = skb_header_pointer(pkt->skb, nhoff, sizeof(_iph), &_iph); if (!iph) return -1; if (iph->ihl < 5 || iph->version != 4) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff + (iph->ihl * 4); if ((ntohs(iph->frag_off) & IP_OFFSET) == 0) { ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = iph->protocol; } } break; case htons(ETH_P_IPV6): { struct ipv6hdr *ip6h, _ip6h; int fh_flags = IP6_FH_F_AUTH; unsigned short fragoff; int l4proto; ip6h = skb_header_pointer(pkt->skb, nhoff, sizeof(_ip6h), &_ip6h); if (!ip6h) return -1; if (ip6h->version != 6) return -1; ctx->inner_nhoff = nhoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_NH; thoff = nhoff; l4proto = ipv6_find_hdr(pkt->skb, &thoff, -1, &fragoff, &fh_flags); if (l4proto < 0 || thoff > U16_MAX) return -1; if (fragoff == 0) { thoff = nhoff + sizeof(_ip6h); ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; ctx->inner_thoff = thoff; ctx->l4proto = l4proto; } } break; default: return -1; } return 0; } static int nft_inner_parse_tunhdr(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *ctx, u32 *off) { if (pkt->tprot == IPPROTO_GRE) { ctx->inner_tunoff = pkt->thoff; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; return 0; } if (pkt->tprot != IPPROTO_UDP) return -1; ctx->inner_tunoff = *off; ctx->flags |= NFT_PAYLOAD_CTX_INNER_TUN; *off += priv->hdrsize; switch (priv->type) { case NFT_INNER_GENEVE: { struct genevehdr *gnvh, _gnvh; gnvh = skb_header_pointer(pkt->skb, pkt->inneroff, sizeof(_gnvh), &_gnvh); if (!gnvh) return -1; *off += gnvh->opt_len * 4; } break; default: break; } return 0; } static int nft_inner_parse(const struct nft_inner *priv, struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { u32 off = pkt->inneroff; if (priv->flags & NFT_INNER_HDRSIZE && nft_inner_parse_tunhdr(priv, pkt, tun_ctx, &off) < 0) return -1; if (priv->flags & (NFT_INNER_LL | NFT_INNER_NH)) { if (nft_inner_parse_l2l3(priv, pkt, tun_ctx, off) < 0) return -1; } else if (priv->flags & NFT_INNER_TH) { tun_ctx->inner_thoff = off; tun_ctx->flags |= NFT_PAYLOAD_CTX_INNER_TH; } tun_ctx->type = priv->type; tun_ctx->cookie = (unsigned long)pkt->skb; pkt->flags |= NFT_PKTINFO_INNER_FULL; return 0; } static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) { local_bh_enable(); local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); return false; } *tun_ctx = *this_cpu_tun_ctx; local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); local_bh_enable(); return true; } static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt, const struct nft_inner_tun_ctx *tun_ctx) { struct nft_inner_tun_ctx *this_cpu_tun_ctx; local_bh_disable(); local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx); if (this_cpu_tun_ctx->cookie != tun_ctx->cookie) *this_cpu_tun_ctx = *tun_ctx; local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock); local_bh_enable(); } static bool nft_inner_parse_needed(const struct nft_inner *priv, const struct nft_pktinfo *pkt, struct nft_inner_tun_ctx *tun_ctx) { if (!(pkt->flags & NFT_PKTINFO_INNER_FULL)) return true; if (!nft_inner_restore_tun_ctx(pkt, tun_ctx)) return true; if (priv->type != tun_ctx->type) return true; return false; } static void nft_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_inner *priv = nft_expr_priv(expr); struct nft_inner_tun_ctx tun_ctx = {}; if (nft_payload_inner_offset(pkt) < 0) goto err; if (nft_inner_parse_needed(priv, pkt, &tun_ctx) && nft_inner_parse(priv, (struct nft_pktinfo *)pkt, &tun_ctx) < 0) goto err; switch (priv->expr_type) { case NFT_INNER_EXPR_PAYLOAD: nft_payload_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; case NFT_INNER_EXPR_META: nft_meta_inner_eval((struct nft_expr *)&priv->expr, regs, pkt, &tun_ctx); break; default: WARN_ON_ONCE(1); goto err; } nft_inner_save_tun_ctx(pkt, &tun_ctx); return; err: regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { [NFTA_INNER_NUM] = { .type = NLA_U32 }, [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, [NFTA_INNER_TYPE] = { .type = NLA_U32 }, [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, }; struct nft_expr_info { const struct nft_expr_ops *ops; const struct nlattr *attr; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; }; static int nft_inner_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_inner *priv = nft_expr_priv(expr); u32 flags, hdrsize, type, num; struct nft_expr_info expr_info; int err; if (!tb[NFTA_INNER_FLAGS] || !tb[NFTA_INNER_NUM] || !tb[NFTA_INNER_HDRSIZE] || !tb[NFTA_INNER_TYPE] || !tb[NFTA_INNER_EXPR]) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_INNER_FLAGS])); if (flags & ~NFT_INNER_MASK) return -EOPNOTSUPP; num = ntohl(nla_get_be32(tb[NFTA_INNER_NUM])); if (num != 0) return -EOPNOTSUPP; hdrsize = ntohl(nla_get_be32(tb[NFTA_INNER_HDRSIZE])); type = ntohl(nla_get_be32(tb[NFTA_INNER_TYPE])); if (type > U8_MAX) return -EINVAL; if (flags & NFT_INNER_HDRSIZE) { if (hdrsize == 0 || hdrsize > 64) return -EOPNOTSUPP; } priv->flags = flags; priv->hdrsize = hdrsize; priv->type = type; err = nft_expr_inner_parse(ctx, tb[NFTA_INNER_EXPR], &expr_info); if (err < 0) return err; priv->expr.ops = expr_info.ops; if (!strcmp(expr_info.ops->type->name, "payload")) priv->expr_type = NFT_INNER_EXPR_PAYLOAD; else if (!strcmp(expr_info.ops->type->name, "meta")) priv->expr_type = NFT_INNER_EXPR_META; else return -EINVAL; err = expr_info.ops->init(ctx, (struct nft_expr *)&priv->expr, (const struct nlattr * const*)expr_info.tb); if (err < 0) return err; return 0; } static int nft_inner_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_inner *priv = nft_expr_priv(expr); if (nla_put_be32(skb, NFTA_INNER_NUM, htonl(0)) || nla_put_be32(skb, NFTA_INNER_TYPE, htonl(priv->type)) || nla_put_be32(skb, NFTA_INNER_FLAGS, htonl(priv->flags)) || nla_put_be32(skb, NFTA_INNER_HDRSIZE, htonl(priv->hdrsize))) goto nla_put_failure; if (nft_expr_dump(skb, NFTA_INNER_EXPR, (struct nft_expr *)&priv->expr, reset) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nft_expr_ops nft_inner_ops = { .type = &nft_inner_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_inner)), .eval = nft_inner_eval, .init = nft_inner_init, .dump = nft_inner_dump, }; struct nft_expr_type nft_inner_type __read_mostly = { .name = "inner", .ops = &nft_inner_ops, .policy = nft_inner_policy, .maxattr = NFTA_INNER_MAX, .owner = THIS_MODULE, };
91 97 97 69 91 1 1 66 92 97 79 79 1 1 1 1 1 1 1 69 70 70 70 65 68 68 92 80 80 80 80 80 80 80 92 92 92 91 92 92 92 92 92 92 92 92 92 91 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 68 69 69 69 68 69 69 103 92 92 92 91 1 1 92 92 91 65 66 92 92 2 91 92 69 69 3 136 2 136 136 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "bbpos.h" #include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" #include "errcode.h" #include "error.h" #include "journal.h" #include "trace.h" #include <linux/prefetch.h> #include <linux/sched/mm.h> #include <linux/swap.h> const char * const bch2_btree_node_flags[] = { "typebit", "typebit", "typebit", #define x(f) [BTREE_NODE_##f] = #f, BTREE_FLAGS() #undef x NULL }; void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned reserve = 16; if (!c->btree_roots_known[0].b) reserve += 8; for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->b) reserve += min_t(unsigned, 1, r->b->c.level) * 8; } c->btree_cache.nr_reserve = reserve; } static inline size_t btree_cache_can_free(struct btree_cache_list *list) { struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); size_t can_free = list->nr; if (!list->idx) can_free = max_t(ssize_t, 0, can_free - bc->nr_reserve); return can_free; } static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) { BUG_ON(!list_empty(&b->list)); if (b->c.lock.readers) list_add(&b->list, &bc->freed_pcpu); else list_add(&b->list, &bc->freed_nonpcpu); } static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(!b->data); bc->nr_freeable++; list_add(&b->list, &bc->freeable); } void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; mutex_lock(&bc->lock); __bch2_btree_node_to_freelist(bc, b); mutex_unlock(&bc->lock); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } void __btree_node_data_free(struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); /* * This should really be done in slub/vmalloc, but we're using the * kmalloc_large() path, so we're working around a slub bug by doing * this here: */ if (b->data) mm_account_reclaimed_pages(btree_buf_bytes(b) / PAGE_SIZE); if (b->aux_data) mm_account_reclaimed_pages(btree_aux_data_bytes(b) / PAGE_SIZE); EBUG_ON(btree_node_write_in_flight(b)); clear_btree_node_just_written(b); kvfree(b->data); b->data = NULL; #ifdef __KERNEL__ kvfree(b->aux_data); #else munmap(b->aux_data, btree_aux_data_bytes(b)); #endif b->aux_data = NULL; } static void btree_node_data_free(struct btree_cache *bc, struct btree *b) { BUG_ON(list_empty(&b->list)); list_del_init(&b->list); __btree_node_data_free(b); --bc->nr_freeable; btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { const struct btree *b = obj; const u64 *v = arg->key; return b->hash_val == *v ? 0 : 1; } static const struct rhashtable_params bch_btree_cache_params = { .head_offset = offsetof(struct btree, hash), .key_offset = offsetof(struct btree, hash_val), .key_len = sizeof(u64), .obj_cmpfn = bch2_btree_cache_cmp_fn, .automatic_shrinking = true, }; static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) { BUG_ON(b->data || b->aux_data); gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE; b->data = kvmalloc(btree_buf_bytes(b), gfp); if (!b->data) return bch_err_throw(c, ENOMEM_btree_node_mem_alloc); #ifdef __KERNEL__ b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); #else b->aux_data = mmap(NULL, btree_aux_data_bytes(b), PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (b->aux_data == MAP_FAILED) b->aux_data = NULL; #endif if (!b->aux_data) { kvfree(b->data); b->data = NULL; return bch_err_throw(c, ENOMEM_btree_node_mem_alloc); } return 0; } static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) { struct btree *b; b = kzalloc(sizeof(struct btree), gfp); if (!b) return NULL; bkey_btree_ptr_init(&b->key); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); b->byte_order = ilog2(c->opts.btree_node_size); return b; } struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) { struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) return NULL; if (btree_node_data_alloc(c, b, GFP_KERNEL)) { kfree(b); return NULL; } bch2_btree_lock_init(&b->c, 0, GFP_KERNEL); return b; } static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b) { struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p); u64 mask = bc->pinned_nodes_mask[!!b->c.level]; return ((mask & BIT_ULL(b->c.btree_id)) && bbpos_cmp(bc->pinned_nodes_start, pos) < 0 && bbpos_cmp(bc->pinned_nodes_end, pos) >= 0); } void bch2_node_pin(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; mutex_lock(&bc->lock); if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { set_btree_node_pinned(b); list_move(&b->list, &bc->live[1].list); bc->live[0].nr--; bc->live[1].nr++; } mutex_unlock(&bc->lock); } void bch2_btree_cache_unpin(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b, *n; mutex_lock(&bc->lock); c->btree_cache.pinned_nodes_mask[0] = 0; c->btree_cache.pinned_nodes_mask[1] = 0; list_for_each_entry_safe(b, n, &bc->live[1].list, list) { clear_btree_node_pinned(b); list_move(&b->list, &bc->live[0].list); bc->live[0].nr++; bc->live[1].nr--; } mutex_unlock(&bc->lock); } /* Btree in memory cache - hash table */ void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { lockdep_assert_held(&bc->lock); int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); BUG_ON(ret); /* Cause future lookups for this node to fail: */ b->hash_val = 0; if (b->c.btree_id < BTREE_ID_NR) --bc->nr_by_btree[b->c.btree_id]; --bc->live[btree_node_pinned(b)].nr; list_del_init(&b->list); } void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { __bch2_btree_node_hash_remove(bc, b); __bch2_btree_node_to_freelist(bc, b); } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(b->hash_val); b->hash_val = btree_ptr_hash_val(&b->key); int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params); if (ret) return ret; if (b->c.btree_id < BTREE_ID_NR) bc->nr_by_btree[b->c.btree_id]++; bool p = __btree_node_pinned(bc, b); mod_bit(BTREE_NODE_pinned, &b->flags, p); list_add_tail(&b->list, &bc->live[p].list); bc->live[p].nr++; return 0; } int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, unsigned level, enum btree_id id) { b->c.level = level; b->c.btree_id = id; mutex_lock(&bc->lock); int ret = __bch2_btree_node_hash_insert(bc, b); mutex_unlock(&bc->lock); return ret; } void bch2_btree_node_update_key_early(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_i *new) { struct bch_fs *c = trans->c; struct btree *b; struct bkey_buf tmp; int ret; bch2_bkey_buf_init(&tmp); bch2_bkey_buf_reassemble(&tmp, c, old); b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); if (!IS_ERR_OR_NULL(b)) { mutex_lock(&c->btree_cache.lock); __bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, new); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); mutex_unlock(&c->btree_cache.lock); six_unlock_read(&b->c.lock); } bch2_bkey_buf_exit(&tmp, c); } __flatten static inline struct btree *btree_cache_find(struct btree_cache *bc, const struct bkey_i *k) { u64 v = btree_ptr_hash_val(k); return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); } static int __btree_node_reclaim_checks(struct bch_fs *c, struct btree *b, bool flush, bool locked) { struct btree_cache *bc = &c->btree_cache; lockdep_assert_held(&bc->lock); if (btree_node_noevict(b)) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_noevict]++; return bch_err_throw(c, ENOMEM_btree_node_reclaim); } if (btree_node_write_blocked(b)) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_blocked]++; return bch_err_throw(c, ENOMEM_btree_node_reclaim); } if (btree_node_will_make_reachable(b)) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_will_make_reachable]++; return bch_err_throw(c, ENOMEM_btree_node_reclaim); } if (btree_node_dirty(b)) { if (!flush) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_dirty]++; return bch_err_throw(c, ENOMEM_btree_node_reclaim); } if (locked) { /* * Using the underscore version because we don't want to compact * bsets after the write, since this node is about to be evicted * - unless btree verify mode is enabled, since it runs out of * the post write cleanup: */ if (static_branch_unlikely(&bch2_verify_btree_ondisk)) bch2_btree_node_write(c, b, SIX_LOCK_intent, BTREE_WRITE_cache_reclaim); else __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); } } if (b->flags & ((1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { if (!flush) { if (btree_node_read_in_flight(b)) bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_read_in_flight]++; else if (btree_node_write_in_flight(b)) bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_write_in_flight]++; return bch_err_throw(c, ENOMEM_btree_node_reclaim); } if (locked) return -EINTR; /* XXX: waiting on IO with btree cache lock held */ bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_write(b); } return 0; } /* * this version is for btree nodes that have already been freed (we're not * reaping a real btree node) */ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) { struct btree_cache *bc = &c->btree_cache; int ret = 0; lockdep_assert_held(&bc->lock); retry_unlocked: ret = __btree_node_reclaim_checks(c, b, flush, false); if (ret) return ret; if (!six_trylock_intent(&b->c.lock)) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_intent]++; return bch_err_throw(c, ENOMEM_btree_node_reclaim); } if (!six_trylock_write(&b->c.lock)) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_lock_write]++; six_unlock_intent(&b->c.lock); return bch_err_throw(c, ENOMEM_btree_node_reclaim); } /* recheck under lock */ ret = __btree_node_reclaim_checks(c, b, flush, true); if (ret) { six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); if (ret == -EINTR) goto retry_unlocked; return ret; } if (b->hash_val && !ret) trace_and_count(c, btree_cache_reap, c, b); return 0; } static int btree_node_reclaim(struct bch_fs *c, struct btree *b) { return __btree_node_reclaim(c, b, false); } static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) { return __btree_node_reclaim(c, b, true); } static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct btree_cache_list *list = shrink->private_data; struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache); struct btree *b, *t; unsigned long nr = sc->nr_to_scan; unsigned long can_free = 0; unsigned long freed = 0; unsigned long touched = 0; unsigned i, flags; unsigned long ret = SHRINK_STOP; bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4; if (static_branch_unlikely(&bch2_btree_shrinker_disabled)) return SHRINK_STOP; mutex_lock(&bc->lock); flags = memalloc_nofs_save(); /* * It's _really_ critical that we don't free too many btree nodes - we * have to always leave ourselves a reserve. The reserve is how we * guarantee that allocating memory for a new btree node can always * succeed, so that inserting keys into the btree can always succeed and * IO can always make forward progress: */ can_free = btree_cache_can_free(list); if (nr > can_free) { bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_cache_reserve] += nr - can_free; nr = can_free; } i = 0; list_for_each_entry_safe(b, t, &bc->freeable, list) { /* * Leave a few nodes on the freeable list, so that a btree split * won't have to hit the system allocator: */ if (++i <= 3) continue; touched++; if (touched >= nr) goto out; if (!btree_node_reclaim(c, b)) { btree_node_data_free(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); freed++; bc->nr_freed++; } } restart: list_for_each_entry_safe(b, t, &list->list, list) { touched++; if (btree_node_accessed(b)) { clear_btree_node_accessed(b); bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; --touched;; } else if (!btree_node_reclaim(c, b)) { __bch2_btree_node_hash_remove(bc, b); __btree_node_data_free(b); btree_node_to_freedlist(bc, b); freed++; bc->nr_freed++; six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); if (freed == nr) goto out_rotate; } else if (trigger_writes && btree_node_dirty(b) && !btree_node_will_make_reachable(b) && !btree_node_write_blocked(b) && six_trylock_read(&b->c.lock)) { list_move(&list->list, &b->list); mutex_unlock(&bc->lock); __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_read(&b->c.lock); if (touched >= nr) goto out_nounlock; mutex_lock(&bc->lock); goto restart; } if (touched >= nr) break; } out_rotate: if (&t->list != &list->list) list_move_tail(&list->list, &t->list); out: mutex_unlock(&bc->lock); out_nounlock: ret = freed; memalloc_nofs_restore(flags); trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret); return ret; } static unsigned long bch2_btree_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct btree_cache_list *list = shrink->private_data; if (static_branch_unlikely(&bch2_btree_shrinker_disabled)) return 0; return btree_cache_can_free(list); } void bch2_fs_btree_cache_exit(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b, *t; unsigned long flags; shrinker_free(bc->live[1].shrink); shrinker_free(bc->live[0].shrink); /* vfree() can allocate memory: */ flags = memalloc_nofs_save(); mutex_lock(&bc->lock); if (c->verify_data) list_move(&c->verify_data->list, &bc->live[0].list); kvfree(c->verify_ondisk); for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->b) list_add(&r->b->list, &bc->live[0].list); } list_for_each_entry_safe(b, t, &bc->live[1].list, list) bch2_btree_node_hash_remove(bc, b); list_for_each_entry_safe(b, t, &bc->live[0].list, list) bch2_btree_node_hash_remove(bc, b); list_for_each_entry_safe(b, t, &bc->freeable, list) { BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); btree_node_data_free(bc, b); cond_resched(); } BUG_ON(!bch2_journal_error(&c->journal) && atomic_long_read(&c->btree_cache.nr_dirty)); list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); list_for_each_entry_safe(b, t, &bc->freed_nonpcpu, list) { list_del(&b->list); six_lock_exit(&b->c.lock); kfree(b); } mutex_unlock(&bc->lock); memalloc_nofs_restore(flags); for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) BUG_ON(bc->nr_by_btree[i]); BUG_ON(bc->live[0].nr); BUG_ON(bc->live[1].nr); BUG_ON(bc->nr_freeable); if (bc->table_init_done) rhashtable_destroy(&bc->table); } int bch2_fs_btree_cache_init(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct shrinker *shrink; unsigned i; int ret = 0; ret = rhashtable_init(&bc->table, &bch_btree_cache_params); if (ret) goto err; bc->table_init_done = true; bch2_recalc_btree_reserve(c); for (i = 0; i < bc->nr_reserve; i++) { struct btree *b = __bch2_btree_node_mem_alloc(c); if (!b) goto err; __bch2_btree_node_to_freelist(bc, b); } list_splice_init(&bc->live[0].list, &bc->freeable); mutex_init(&c->verify_lock); shrink = shrinker_alloc(0, "%s-btree_cache", c->name); if (!shrink) goto err; bc->live[0].shrink = shrink; shrink->count_objects = bch2_btree_cache_count; shrink->scan_objects = bch2_btree_cache_scan; shrink->seeks = 2; shrink->private_data = &bc->live[0]; shrinker_register(shrink); shrink = shrinker_alloc(0, "%s-btree_cache-pinned", c->name); if (!shrink) goto err; bc->live[1].shrink = shrink; shrink->count_objects = bch2_btree_cache_count; shrink->scan_objects = bch2_btree_cache_scan; shrink->seeks = 8; shrink->private_data = &bc->live[1]; shrinker_register(shrink); return 0; err: return bch_err_throw(c, ENOMEM_fs_btree_cache_init); } void bch2_fs_btree_cache_init_early(struct btree_cache *bc) { mutex_init(&bc->lock); for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) { bc->live[i].idx = i; INIT_LIST_HEAD(&bc->live[i].list); } INIT_LIST_HEAD(&bc->freeable); INIT_LIST_HEAD(&bc->freed_pcpu); INIT_LIST_HEAD(&bc->freed_nonpcpu); } /* * We can only have one thread cannibalizing other cached btree nodes at a time, * or we'll deadlock. We use an open coded mutex to ensure that, which a * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; if (bc->alloc_lock == current) { trace_and_count(c, btree_cache_cannibalize_unlock, trans); bc->alloc_lock = NULL; closure_wake_up(&bc->alloc_wait); } } int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct task_struct *old; old = NULL; if (try_cmpxchg(&bc->alloc_lock, &old, current) || old == current) goto success; if (!cl) { trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return bch_err_throw(c, ENOMEM_btree_cache_cannibalize_lock); } closure_wait(&bc->alloc_wait, cl); /* Try again, after adding ourselves to waitlist */ old = NULL; if (try_cmpxchg(&bc->alloc_lock, &old, current) || old == current) { /* We raced */ closure_wake_up(&bc->alloc_wait); goto success; } trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return bch_err_throw(c, btree_cache_cannibalize_lock_blocked); success: trace_and_count(c, btree_cache_cannibalize_lock, trans); return 0; } static struct btree *btree_node_cannibalize(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) list_for_each_entry_reverse(b, &bc->live[i].list, list) if (!btree_node_reclaim(c, b)) return b; while (1) { for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) list_for_each_entry_reverse(b, &bc->live[i].list, list) if (!btree_node_write_and_reclaim(c, b)) return b; /* * Rare case: all nodes were intent-locked. * Just busy-wait. */ WARN_ONCE(1, "btree cache cannibalize failed\n"); cond_resched(); } } struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct list_head *freed = pcpu_read_locks ? &bc->freed_pcpu : &bc->freed_nonpcpu; struct btree *b, *b2; u64 start_time = local_clock(); mutex_lock(&bc->lock); /* * We never free struct btree itself, just the memory that holds the on * disk node. Check the freed list before allocating a new one: */ list_for_each_entry(b, freed, list) if (!btree_node_reclaim(c, b)) { list_del_init(&b->list); goto got_node; } b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); if (b) { bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT); } else { mutex_unlock(&bc->lock); bch2_trans_unlock(trans); b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) goto err; bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL); mutex_lock(&bc->lock); } BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); got_node: /* * btree_free() doesn't free memory; it sticks the node on the end of * the list. Check if there's any freed nodes there: */ list_for_each_entry(b2, &bc->freeable, list) if (!btree_node_reclaim(c, b2)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); list_del_init(&b2->list); --bc->nr_freeable; btree_node_to_freedlist(bc, b2); mutex_unlock(&bc->lock); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); goto got_mem; } mutex_unlock(&bc->lock); if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { bch2_trans_unlock(trans); if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) goto err; } got_mem: BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); BUG_ON(btree_node_dirty(b)); BUG_ON(btree_node_write_in_flight(b)); out: b->flags = 0; b->written = 0; b->nsets = 0; b->sib_u64s[0] = 0; b->sib_u64s[1] = 0; b->whiteout_u64s = 0; bch2_btree_keys_init(b); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], start_time); int ret = bch2_trans_relock(trans); if (unlikely(ret)) { bch2_btree_node_to_freelist(c, b); return ERR_PTR(ret); } return b; err: mutex_lock(&bc->lock); /* Try to cannibalize another cached btree node: */ if (bc->alloc_lock == current) { b2 = btree_node_cannibalize(c); clear_btree_node_just_written(b2); __bch2_btree_node_hash_remove(bc, b2); if (b) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); } else { b = b2; } BUG_ON(!list_empty(&b->list)); mutex_unlock(&bc->lock); trace_and_count(c, btree_cache_cannibalize, trans); goto out; } mutex_unlock(&bc->lock); return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc); } /* Slowpath, don't want it inlined into btree_iter_traverse() */ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level, enum six_lock_type lock_type, bool sync) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; if (unlikely(level >= BTREE_MAX_DEPTH)) { int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u", level, BTREE_MAX_DEPTH); return ERR_PTR(ret); } if (unlikely(!bkey_is_btree_ptr(&k->k))) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); printbuf_exit(&buf); return ERR_PTR(ret); } if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); printbuf_exit(&buf); return ERR_PTR(ret); } /* * Parent node must be locked, else we could read in a btree node that's * been freed: */ if (path && !bch2_btree_node_relock(trans, path, level + 1)) { trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); } b = bch2_btree_node_mem_alloc(trans, level != 0); if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) { if (!path) return b; trans->memory_allocation_failure = true; trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail)); } if (IS_ERR(b)) return b; bkey_copy(&b->key, k); if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ /* mark as unhashed... */ b->hash_val = 0; mutex_lock(&bc->lock); __bch2_btree_node_to_freelist(bc, b); mutex_unlock(&bc->lock); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); return NULL; } set_btree_node_read_in_flight(b); six_unlock_write(&b->c.lock); if (path) { u32 seq = six_lock_seq(&b->c.lock); /* Unlock before doing IO: */ six_unlock_intent(&b->c.lock); bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, sync); int ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); if (!sync) return NULL; if (!six_relock_type(&b->c.lock, lock_type, seq)) b = NULL; } else { bch2_btree_node_read(trans, b, sync); if (lock_type == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } return b; } static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { struct printbuf buf = PRINTBUF; if (c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations) return; prt_printf(&buf, "btree node header doesn't match ptr: "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, "\nptr: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, "\nheader: "); bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data)); prt_str(&buf, "\nmin "); bch2_bpos_to_text(&buf, b->data->min_key); prt_printf(&buf, "\nmax "); bch2_bpos_to_text(&buf, b->data->max_key); bch2_fs_topology_error(c, "%s", buf.buf); printbuf_exit(&buf); } static inline void btree_check_header(struct bch_fs *c, struct btree *b) { if (b->c.btree_id != BTREE_NODE_ID(b->data) || b->c.level != BTREE_NODE_LEVEL(b->data) || !bpos_eq(b->data->max_key, b->key.k.p) || (b->key.k.type == KEY_TYPE_btree_ptr_v2 && !bpos_eq(b->data->min_key, bkey_i_to_btree_ptr_v2(&b->key)->v.min_key))) btree_bad_header(c, b); } static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; bool need_relock = false; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { /* * We must have the parent locked to call bch2_btree_node_fill(), * else we could read in a btree node from disk that's been * freed: */ b = bch2_btree_node_fill(trans, path, k, path->btree_id, level, lock_type, true); need_relock = true; /* We raced and found the btree node in the cache */ if (!b) goto retry; if (IS_ERR(b)) return b; } else { if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); } if (unlikely(btree_node_read_in_flight(b))) { u32 seq = six_lock_seq(&b->c.lock); six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(trans); need_relock = true; bch2_btree_node_wait_on_read(b); ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); /* * should_be_locked is not set on this path yet, so we need to * relock it specifically: */ if (!six_relock_type(&b->c.lock, lock_type, seq)) goto retry; } if (unlikely(need_relock)) { ret = bch2_trans_relock(trans) ?: bch2_btree_path_relock_intent(trans, path); if (ret) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(ret); } } prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); } EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); return b; } /** * bch2_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * * @trans: btree transaction object * @path: btree_path being traversed * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2) * @level: level of btree node being looked up (0 == leaf node) * @lock_type: SIX_LOCK_read or SIX_LOCK_intent * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek()) * * The btree node will have either a read or a write lock held, depending on * the @write parameter. * * Returns: btree node or ERR_PTR() */ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree *b; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); b = btree_node_mem_ptr(k); /* * Check b->hash_val _before_ calling btree_node_lock() - this might not * be the node we want anymore, and trying to lock the wrong node could * cause an unneccessary transaction restart: */ if (unlikely(!c->opts.btree_node_mem_ptr_optimization || !b || b->hash_val != btree_ptr_hash_val(k))) return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); if (bch2_btree_node_relock(trans, path, level + 1)) return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } if (unlikely(btree_node_read_in_flight(b))) { six_unlock_type(&b->c.lock, lock_type); return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); } prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(-BCH_ERR_btree_node_read_err_cached); } EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); return b; } struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, const struct bkey_i *k, enum btree_id btree_id, unsigned level, bool nofill) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); if (c->opts.btree_node_mem_ptr_optimization) { b = btree_node_mem_ptr(k); if (b) goto lock_node; } retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { if (nofill) goto out; b = bch2_btree_node_fill(trans, NULL, k, btree_id, level, SIX_LOCK_read, true); /* We raced and found the btree node in the cache */ if (!b) goto retry; if (IS_ERR(b) && !bch2_btree_cache_cannibalize_lock(trans, NULL)) goto retry; if (IS_ERR(b)) goto out; } else { lock_node: ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.btree_id != btree_id || b->c.level != level)) { six_unlock_read(&b->c.lock); goto retry; } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); } /* XXX: waiting on IO with btree locks held: */ __bch2_btree_node_wait_on_read(b); prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); b = ERR_PTR(-BCH_ERR_btree_node_read_err_cached); goto out; } EBUG_ON(b->c.btree_id != btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); out: bch2_btree_cache_cannibalize_unlock(trans); return b; } int bch2_btree_node_prefetch(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; BUG_ON(path && !btree_node_locked(path, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); struct btree *b = btree_cache_find(bc, k); if (b) return 0; b = bch2_btree_node_fill(trans, path, k, btree_id, level, SIX_LOCK_read, false); int ret = PTR_ERR_OR_ZERO(b); if (ret) return ret; if (b) six_unlock_read(&b->c.lock); return 0; } void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; b = btree_cache_find(bc, k); if (!b) return; BUG_ON(b == btree_node_root(trans->c, b)); wait_on_io: /* not allowed to wait on io with btree locks held: */ /* XXX we're called from btree_gc which will be holding other btree * nodes locked */ __bch2_btree_node_wait_on_read(b); __bch2_btree_node_wait_on_write(b); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); if (unlikely(b->hash_val != btree_ptr_hash_val(k))) goto out; if (btree_node_dirty(b)) { __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } BUG_ON(btree_node_dirty(b)); mutex_lock(&bc->lock); bch2_btree_node_hash_remove(bc, b); btree_node_data_free(bc, b); mutex_unlock(&bc->lock); out: six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } const char *bch2_btree_id_str(enum btree_id btree) { return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)"; } void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree) { if (btree < BTREE_ID_NR) prt_str(out, __bch2_btree_ids[btree]); else prt_printf(out, "(unknown btree %u)", btree); } void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level) { prt_str(out, "btree="); bch2_btree_id_to_text(out, btree); prt_printf(out, " level=%u", level); } void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, enum btree_id btree, unsigned level, struct bkey_s_c k) { bch2_btree_id_to_text(out, btree); prt_printf(out, " level %u/", level); struct btree_root *r = bch2_btree_id_root(c, btree); if (r) prt_printf(out, "%u", r->level); else prt_printf(out, "(unknown)"); prt_newline(out); bch2_bkey_val_to_text(out, c, k); } void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { __bch2_btree_pos_to_text(out, c, b->c.btree_id, b->c.level, bkey_i_to_s_c(&b->key)); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { struct bset_stats stats; memset(&stats, 0, sizeof(stats)); bch2_btree_keys_stats(b, &stats); prt_printf(out, "l %u ", b->c.level); bch2_bpos_to_text(out, b->data->min_key); prt_printf(out, " - "); bch2_bpos_to_text(out, b->data->max_key); prt_printf(out, ":\n" " ptrs: "); bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); prt_newline(out); prt_printf(out, " format: "); bch2_bkey_format_to_text(out, &b->format); prt_printf(out, " unpack fn len: %u\n" " bytes used %zu/%zu (%zu%% full)\n" " sib u64s: %u, %u (merge threshold %u)\n" " nr packed keys %u\n" " nr unpacked keys %u\n" " floats %zu\n" " failed unpacked %zu\n", b->unpack_fn_len, b->nr.live_u64s * sizeof(u64), btree_buf_bytes(b) - sizeof(struct btree_node), b->nr.live_u64s * 100 / btree_max_u64s(c), b->sib_u64s[0], b->sib_u64s[1], c->btree_foreground_merge_threshold, b->nr.packed_keys, b->nr.unpacked_keys, stats.floats, stats.failed); } static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c, const char *label, size_t nr) { prt_printf(out, "%s\t", label); prt_human_readable_u64(out, nr * c->opts.btree_node_size); prt_printf(out, " (%zu)\n", nr); } static const char * const bch2_btree_cache_not_freed_reasons_strs[] = { #define x(n) #n, BCH_BTREE_CACHE_NOT_FREED_REASONS() #undef x NULL }; void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache); if (!out->nr_tabstops) printbuf_tabstop_push(out, 32); prt_btree_cache_line(out, c, "live:", bc->live[0].nr); prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr); prt_btree_cache_line(out, c, "reserve:", bc->nr_reserve); prt_btree_cache_line(out, c, "freed:", bc->nr_freeable); prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty)); prt_printf(out, "cannibalize lock:\t%s\n", bc->alloc_lock ? "held" : "not held"); prt_newline(out); for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { bch2_btree_id_to_text(out, i); prt_printf(out, "\t"); prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size); prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]); } prt_newline(out); prt_printf(out, "counters since mount:\n"); prt_printf(out, "freed:\t%zu\n", bc->nr_freed); prt_printf(out, "not freed:\n"); for (unsigned i = 0; i < ARRAY_SIZE(bc->not_freed); i++) prt_printf(out, " %s\t%llu\n", bch2_btree_cache_not_freed_reasons_strs[i], bc->not_freed[i]); }
249 2321 2299 2208 1759 1762 83 301 2299 2023 25 61 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* include/asm-generic/tlb.h * * Generic TLB shootdown code * * Copyright 2001 Red Hat, Inc. * Based on code from mm/memory.c Copyright Linus Torvalds and others. * * Copyright 2011 Red Hat, Inc., Peter Zijlstra */ #ifndef _ASM_GENERIC__TLB_H #define _ASM_GENERIC__TLB_H #include <linux/mmu_notifier.h> #include <linux/swap.h> #include <linux/hugetlb_inline.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> /* * Blindly accessing user memory from NMI context can be dangerous * if we're in the middle of switching the current user task or switching * the loaded mm. */ #ifndef nmi_uaccess_okay # define nmi_uaccess_okay() true #endif #ifdef CONFIG_MMU /* * Generic MMU-gather implementation. * * The mmu_gather data structure is used by the mm code to implement the * correct and efficient ordering of freeing pages and TLB invalidations. * * This correct ordering is: * * 1) unhook page * 2) TLB invalidate page * 3) free page * * That is, we must never free a page before we have ensured there are no live * translations left to it. Otherwise it might be possible to observe (or * worse, change) the page content after it has been reused. * * The mmu_gather API consists of: * * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu() * * start and finish a mmu_gather * * Finish in particular will issue a (final) TLB invalidate and free * all (remaining) queued pages. * * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA * * Defaults to flushing at tlb_end_vma() to reset the range; helps when * there's large holes between the VMAs. * * - tlb_free_vmas() * * tlb_free_vmas() marks the start of unlinking of one or more vmas * and freeing page-tables. * * - tlb_remove_table() * * tlb_remove_table() is the basic primitive to free page-table directories * (__p*_free_tlb()). In it's most primitive form it is an alias for * tlb_remove_page() below, for when page directories are pages and have no * additional constraints. * * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. * * - tlb_remove_page() / tlb_remove_page_size() * - __tlb_remove_folio_pages() / __tlb_remove_page_size() * - __tlb_remove_folio_pages_size() * * __tlb_remove_folio_pages_size() is the basic primitive that queues pages * for freeing. It will return a boolean indicating if the queue is (now) * full and a call to tlb_flush_mmu() is required. * * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * * __tlb_remove_folio_pages() is similar to __tlb_remove_page_size(), * however, instead of removing a single page, assume PAGE_SIZE and remove * the given number of consecutive pages that are all part of the * same (large) folio. * * - tlb_change_page_size() * * call before __tlb_remove_page*() to set the current page-size; implies a * possible tlb_flush_mmu() call. * * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() * * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets * related state, like the range) * * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees * whatever pages are still batched. * * - mmu_gather::fullmm * * A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free * the entire mm; this allows a number of optimizations. * * - We can ignore tlb_{start,end}_vma(); because we don't * care about ranges. Everything will be shot down. * * - (RISC) architectures that use ASIDs can cycle to a new ASID * and delay the invalidation until ASID space runs out. * * - mmu_gather::need_flush_all * * A flag that can be set by the arch code if it wants to force * flush the entire TLB irrespective of the range. For instance * x86-PAE needs this when changing top-level entries. * * And allows the architecture to provide and implement tlb_flush(): * * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make * use of: * * - mmu_gather::start / mmu_gather::end * * which provides the range that needs to be flushed to cover the pages to * be freed. * * - mmu_gather::freed_tables * * set when we freed page table pages * * - tlb_get_unmap_shift() / tlb_get_unmap_size() * * returns the smallest TLB entry size unmapped in this range. * * If an architecture does not provide tlb_flush() a default implementation * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is * specified, in which case we'll default to flush_tlb_mm(). * * Additionally there are a few opt-in features: * * MMU_GATHER_PAGE_SIZE * * This ensures we call tlb_flush() every time tlb_change_page_size() actually * changes the size and provides mmu_gather::page_size to tlb_flush(). * * This might be useful if your architecture has size specific TLB * invalidation instructions. * * MMU_GATHER_TABLE_FREE * * This provides tlb_remove_table(), to be used instead of tlb_remove_page() * for page directores (__p*_free_tlb()). * * Useful if your architecture has non-page page directories. * * When used, an architecture is expected to provide __tlb_remove_table() or * use the generic __tlb_remove_table(), which does the actual freeing of these * pages. * * MMU_GATHER_RCU_TABLE_FREE * * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see * comment below). * * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * * MMU_GATHER_NO_FLUSH_CACHE * * Indicates the architecture has flush_cache_range() but it needs *NOT* be called * before unmapping a VMA. * * NOTE: strictly speaking we shouldn't have this knob and instead rely on * flush_cache_range() being a NOP, except Sparc64 seems to be * different here. * * MMU_GATHER_MERGE_VMAS * * Indicates the architecture wants to merge ranges over VMAs; typical when * multiple range invalidates are more expensive than a full invalidate. * * MMU_GATHER_NO_RANGE * * Use this if your architecture lacks an efficient flush_tlb_range(). This * option implies MMU_GATHER_MERGE_VMAS above. * * MMU_GATHER_NO_GATHER * * If the option is set the mmu_gather will not track individual pages for * delayed page free anymore. A platform that enables the option needs to * provide its own implementation of the __tlb_remove_page_size() function to * free pages. * * This is useful if your architecture already flushes TLB entries in the * various ptep_get_and_clear() functions. */ #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch { #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; #endif unsigned int nr; void *tables[]; }; #define MAX_TABLE_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) #ifndef __HAVE_ARCH_TLB_REMOVE_TABLE static inline void __tlb_remove_table(void *table) { struct ptdesc *ptdesc = (struct ptdesc *)table; pagetable_dtor_free(ptdesc); } #endif extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #else /* !CONFIG_MMU_GATHER_TABLE_FREE */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page); /* * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based * page directories and we can use the normal page batching to free them. */ static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct ptdesc *ptdesc = (struct ptdesc *)table; pagetable_dtor(ptdesc); tlb_remove_page(tlb, ptdesc_page(ptdesc)); } #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * This allows an architecture that does not use the linux page-tables for * hardware to skip the TLBI when freeing page tables. */ #ifndef tlb_needs_table_invalidate #define tlb_needs_table_invalidate() (true) #endif void tlb_remove_table_sync_one(void); #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif static inline void tlb_remove_table_sync_one(void) { } #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ #ifndef CONFIG_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers * to work on, then just handle a few from the on-stack structure. */ #define MMU_GATHER_BUNDLE 8 struct mmu_gather_batch { struct mmu_gather_batch *next; unsigned int nr; unsigned int max; struct encoded_page *encoded_pages[]; }; #define MAX_GATHER_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) /* * Limit the maximum number of mmu_gather batches to reduce a risk of soft * lockups for non-preemptible kernels on huge machines when a lot of memory * is zapped during unmapping. * 10K pages freed at once should be safe even without a preemption point. */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, unsigned int nr_pages, bool delay_rmap); #ifdef CONFIG_SMP /* * This both sets 'delayed_rmap', and returns true. It would be an inline * function, except we define it before the 'struct mmu_gather'. */ #define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true) extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma); #endif #endif /* * We have a no-op version of the rmap removal that doesn't * delay anything. That is used on S390, which flushes remote * TLBs synchronously, and on UP, which doesn't have any * remote TLBs to flush and is not preemptible due to this * all happening under the page table lock. */ #ifndef tlb_delay_rmap #define tlb_delay_rmap(tlb) (false) static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #endif /* * struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch *batch; #endif unsigned long start; unsigned long end; /* * we are in the middle of an operation to clear * a full mm and can make some optimizations */ unsigned int fullmm : 1; /* * we have performed an operation which * requires a complete flush of the tlb */ unsigned int need_flush_all : 1; /* * we have removed page directories */ unsigned int freed_tables : 1; /* * Do we have pending delayed rmap removals? */ unsigned int delayed_rmap : 1; /* * at which levels have we cleared entries? */ unsigned int cleared_ptes : 1; unsigned int cleared_pmds : 1; unsigned int cleared_puds : 1; unsigned int cleared_p4ds : 1; /* * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; unsigned int vma_pfn : 1; unsigned int batch_count; #ifndef CONFIG_MMU_GATHER_NO_GATHER struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; #ifdef CONFIG_MMU_GATHER_PAGE_SIZE unsigned int page_size; #endif #endif }; void tlb_flush_mmu(struct mmu_gather *tlb); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, unsigned int range_size) { tlb->start = min(tlb->start, address); tlb->end = max(tlb->end, address + range_size); } static inline void __tlb_reset_range(struct mmu_gather *tlb) { if (tlb->fullmm) { tlb->start = tlb->end = ~0; } else { tlb->start = TASK_SIZE; tlb->end = 0; } tlb->freed_tables = 0; tlb->cleared_ptes = 0; tlb->cleared_pmds = 0; tlb->cleared_puds = 0; tlb->cleared_p4ds = 0; /* * Do not reset mmu_gather::vma_* fields here, we do not * call into tlb_start_vma() again to set them if there is an * intermediate flush. */ } #ifdef CONFIG_MMU_GATHER_NO_RANGE #if defined(tlb_flush) #error MMU_GATHER_NO_RANGE relies on default tlb_flush() #endif /* * When an architecture does not have efficient means of range flushing TLBs * there is no point in doing intermediate flushes on tlb_end_vma() to keep the * range small. We equally don't have to worry about page granularity or other * things. * * All we need to do is issue a full flush for any !0 range. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->end) flush_tlb_mm(tlb->mm); } #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation * use that. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->fullmm || tlb->need_flush_all) { flush_tlb_mm(tlb->mm); } else if (tlb->end) { struct vm_area_struct vma = { .vm_mm = tlb->mm, .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | (tlb->vma_huge ? VM_HUGETLB : 0), }; flush_tlb_range(&vma, tlb->start, tlb->end); } } #endif #endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { /* * flush_tlb_range() implementations that look at VM_HUGETLB (tile, * mips-4k) flush only large pages. * * flush_tlb_range() implementations that flush I-TLB also flush D-TLB * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing * range. * * We rely on tlb_end_vma() to issue a flush, such that when we reset * these values the batch is empty. */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); /* * Track if there's at least one VM_PFNMAP/VM_MIXEDMAP vma * in the tracked range, see tlb_free_vmas(). */ tlb->vma_pfn |= !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* * Anything calling __tlb_adjust_range() also sets at least one of * these bits. */ if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || tlb->cleared_puds || tlb->cleared_p4ds)) return; tlb_flush(tlb); __tlb_reset_range(tlb); } static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { if (__tlb_remove_page_size(tlb, page, false, page_size)) tlb_flush_mmu(tlb); } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return tlb_remove_page_size(tlb, page, PAGE_SIZE); } static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) { tlb_remove_table(tlb, pt); } static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { #ifdef CONFIG_MMU_GATHER_PAGE_SIZE if (tlb->page_size && tlb->page_size != page_size) { if (!tlb->fullmm && !tlb->need_flush_all) tlb_flush_mmu(tlb); } tlb->page_size = page_size; #endif } static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) { if (tlb->cleared_ptes) return PAGE_SHIFT; if (tlb->cleared_pmds) return PMD_SHIFT; if (tlb->cleared_puds) return PUD_SHIFT; if (tlb->cleared_p4ds) return P4D_SHIFT; return PAGE_SHIFT; } static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) { return 1UL << tlb_get_unmap_shift(tlb); } /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); #ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE flush_cache_range(vma, vma->vm_start, vma->vm_end); #endif } static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm || IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) return; /* * Do a TLB flush and reset the range at VMA boundaries; this avoids * the ranges growing with the unused space between consecutive VMAs, * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on * this. */ tlb_flush_mmu_tlbonly(tlb); } static inline void tlb_free_vmas(struct mmu_gather *tlb) { if (tlb->fullmm) return; /* * VM_PFNMAP is more fragile because the core mm will not track the * page mapcount -- there might not be page-frames for these PFNs * after all. * * Specifically() there is a race between munmap() and * unmap_mapping_range(), where munmap() will unlink the VMA, such * that unmap_mapping_range() will no longer observe the VMA and * no-op, without observing the TLBI, returning prematurely. * * So if we're about to unlink such a VMA, and we have pending * TLBI for such a vma, flush things now. */ if (tlb->vma_pfn) tlb_flush_mmu_tlbonly(tlb); } /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, * and set corresponding cleared_*. */ static inline void tlb_flush_pte_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_ptes = 1; } static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_pmds = 1; } static inline void tlb_flush_pud_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_puds = 1; } static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_p4ds = 1; } #ifndef __tlb_remove_tlb_entry static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address) { } #endif /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * * Record the fact that pte's were really unmapped by updating the range, * so we can later optimise away the tlb invalidate. This helps when * userspace is unmapping already-unmapped pages, which happens quite a lot. */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ tlb_flush_pte_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** * tlb_remove_tlb_entries - remember unmapping of multiple consecutive ptes for * later tlb invalidation. * * Similar to tlb_remove_tlb_entry(), but remember unmapping of multiple * consecutive ptes instead of only a single one. */ static inline void tlb_remove_tlb_entries(struct mmu_gather *tlb, pte_t *ptep, unsigned int nr, unsigned long address) { tlb_flush_pte_range(tlb, address, PAGE_SIZE * nr); for (;;) { __tlb_remove_tlb_entry(tlb, ptep, address); if (--nr == 0) break; ptep++; address += PAGE_SIZE; } } #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ if (_sz >= P4D_SIZE) \ tlb_flush_p4d_range(tlb, address, _sz); \ else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ else if (_sz >= PMD_SIZE) \ tlb_flush_pmd_range(tlb, address, _sz); \ else \ tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation * This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pmd_tlb_entry #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) #endif #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) /** * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb * invalidation. This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pud_tlb_entry #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) #endif #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) /* * For things like page tables caches (ie caching addresses "inside" the * page tables, like x86 does), for legacy reasons, flushing an * individual page had better flush the page table caches behind it. This * is definitely how x86 works, for example. And if you have an * architected non-legacy page table cache (which I'm not aware of * anybody actually doing), you're going to have some architecturally * explicit flushing for that, likely *separate* from a regular TLB entry * flush, and thus you'd need more than just some range expansion.. * * So if we ever find an architecture * that would want something that odd, I think it is up to that * architecture to do its own odd thing, not cause pain for others * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com * * For now w.r.t page table cache, mark the range_size as PAGE_SIZE */ #ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ tlb_flush_pud_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif #ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif #ifndef pte_needs_flush static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) { return true; } #endif #ifndef huge_pmd_needs_flush static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) { return true; } #endif #endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */
23 18 22 23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 /* * linux/fs/nls/nls_ascii.c * * Charset ascii translation tables. * Generated automatically from the Unicode and charset * tables from the Unicode Organization (www.unicode.org). * The Unicode to charset table has only exact mappings. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/nls.h> #include <linux/errno.h> static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, /* 0x10*/ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, /* 0x20*/ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, /* 0x30*/ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, /* 0x40*/ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, /* 0x50*/ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, /* 0x60*/ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, /* 0x70*/ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, }; static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ }; static const unsigned char *const page_uni2charset[256] = { page00, }; static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ }; static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */ 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */ }; static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; if (boundlen <= 0) return -ENAMETOOLONG; uni2charset = page_uni2charset[ch]; if (uni2charset && uni2charset[cl]) out[0] = uni2charset[cl]; else return -EINVAL; return 1; } static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { *uni = charset2uni[*rawstring]; if (*uni == 0x0000) return -EINVAL; return 1; } static struct nls_table table = { .charset = "ascii", .uni2char = uni2char, .char2uni = char2uni, .charset2lower = charset2lower, .charset2upper = charset2upper, }; static int __init init_nls_ascii(void) { return register_nls(&table); } static void __exit exit_nls_ascii(void) { unregister_nls(&table); } module_init(init_nls_ascii) module_exit(exit_nls_ascii) MODULE_DESCRIPTION("NLS ASCII (United States)"); MODULE_LICENSE("Dual BSD/GPL");
7 198 198 198 76 213 215 17 5 5 16 204 216 37 1 1 37 213 215 149 99 21 83 1 198 198 2 95 92 6 19 1 1 6 18 181 181 180 8 168 189 131 2 197 169 169 13 30 131 65 9 65 180 2 1 1 1 164 145 164 40 40 40 40 39 40 32 19 19 19 39 39 40 40 20 2 20 2 185 184 184 185 37 105 180 2 21 21 179 165 156 17 165 158 16 158 16 164 148 40 164 25 183 165 93 105 164 18 2 164 2 45 96 33 32 33 89 1 92 12 72 43 25 21 21 21 21 41 9 18 5 15 6 20 26 18 24 5 16 12 10 9 12 5 13 12 10 9 12 18 18 25 3 18 8 24 3 5 16 16 16 16 16 16 16 16 16 15 16 16 16 16 4 16 6 6 6 5 5 4 8 8 45 46 1 40 1 41 18 33 4 4 2 4 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ /* * jfs_txnmgr.c: transaction manager * * notes: * transaction starts with txBegin() and ends with txCommit() * or txAbort(). * * tlock is acquired at the time of update; * (obviate scan at commit time for xtree and dtree) * tlock and mp points to each other; * (no hashlist for mp -> tlock). * * special cases: * tlock on in-memory inode: * in-place tlock in the in-memory inode itself; * converted to page lock by iWrite() at commit time. * * tlock during write()/mmap() under anonymous transaction (tid = 0): * transferred (?) to transaction at commit time. * * use the page itself to update allocation maps * (obviate intermediate replication of allocation/deallocation data) * hold on to mp+lock thru update of maps */ #include <linux/fs.h> #include <linux/vmalloc.h> #include <linux/completion.h> #include <linux/freezer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kthread.h> #include <linux/seq_file.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_metapage.h" #include "jfs_dinode.h" #include "jfs_imap.h" #include "jfs_dmap.h" #include "jfs_superblock.h" #include "jfs_debug.h" /* * transaction management structures */ static struct { int freetid; /* index of a free tid structure */ int freelock; /* index first free lock word */ wait_queue_head_t freewait; /* eventlist of free tblock */ wait_queue_head_t freelockwait; /* eventlist of free tlock */ wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ int tlocksInUse; /* Number of tlocks in use */ spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ /* struct tblock *sync_queue; * Transactions waiting for data sync */ struct list_head unlock_queue; /* Txns waiting to be released */ struct list_head anon_list; /* inodes having anonymous txns */ struct list_head anon_list2; /* inodes having anonymous txns that couldn't be sync'ed */ } TxAnchor; int jfs_tlocks_low; /* Indicates low number of available tlocks */ #ifdef CONFIG_JFS_STATISTICS static struct { uint txBegin; uint txBegin_barrier; uint txBegin_lockslow; uint txBegin_freetid; uint txBeginAnon; uint txBeginAnon_barrier; uint txBeginAnon_lockslow; uint txLockAlloc; uint txLockAlloc_freelock; } TxStat; #endif static int nTxBlock = -1; /* number of transaction blocks */ module_param(nTxBlock, int, 0); MODULE_PARM_DESC(nTxBlock, "Number of transaction blocks (max:65536)"); static int nTxLock = -1; /* number of transaction locks */ module_param(nTxLock, int, 0); MODULE_PARM_DESC(nTxLock, "Number of transaction locks (max:65536)"); struct tblock *TxBlock; /* transaction block table */ static int TxLockLWM; /* Low water mark for number of txLocks used */ static int TxLockHWM; /* High water mark for number of txLocks used */ static int TxLockVHWM; /* Very High water mark */ struct tlock *TxLock; /* transaction lock table */ /* * transaction management lock */ static DEFINE_SPINLOCK(jfsTxnLock); #define TXN_LOCK() spin_lock(&jfsTxnLock) #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock) #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); static int jfs_commit_thread_waking; /* * Retry logic exist outside these macros to protect from spurrious wakeups. */ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(event, &wait); set_current_state(TASK_UNINTERRUPTIBLE); TXN_UNLOCK(); io_schedule(); remove_wait_queue(event, &wait); } #define TXN_SLEEP(event)\ {\ TXN_SLEEP_DROP_LOCK(event);\ TXN_LOCK();\ } #define TXN_WAKEUP(event) wake_up_all(event) /* * statistics */ static struct { tid_t maxtid; /* 4: biggest tid ever used */ lid_t maxlid; /* 4: biggest lid ever used */ int ntid; /* 4: # of transactions performed */ int nlid; /* 4: # of tlocks acquired */ int waitlock; /* 4: # of tlock wait */ } stattx; /* * forward references */ static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck, struct commit *cd); static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck); static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk); static void txForce(struct tblock * tblk); static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd); static void txUpdateMap(struct tblock * tblk); static void txRelease(struct tblock * tblk); static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void LogSyncRelease(struct metapage * mp); /* * transaction block/lock management * --------------------------------- */ /* * Get a transaction lock from the free list. If the number in use is * greater than the high water mark, wake up the sync daemon. This should * free some anonymous transaction locks. (TXN_LOCK must be held.) */ static lid_t txLockAlloc(void) { lid_t lid; INCREMENT(TxStat.txLockAlloc); if (!TxAnchor.freelock) { INCREMENT(TxStat.txLockAlloc_freelock); } while (!(lid = TxAnchor.freelock)) TXN_SLEEP(&TxAnchor.freelockwait); TxAnchor.freelock = TxLock[lid].next; HIGHWATERMARK(stattx.maxlid, lid); if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { jfs_info("txLockAlloc tlocks low"); jfs_tlocks_low = 1; wake_up_process(jfsSyncThread); } return lid; } static void txLockFree(lid_t lid) { TxLock[lid].tid = 0; TxLock[lid].next = TxAnchor.freelock; TxAnchor.freelock = lid; TxAnchor.tlocksInUse--; if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { jfs_info("txLockFree jfs_tlocks_low no more"); jfs_tlocks_low = 0; TXN_WAKEUP(&TxAnchor.lowlockwait); } TXN_WAKEUP(&TxAnchor.freelockwait); } /* * NAME: txInit() * * FUNCTION: initialize transaction management structures * * RETURN: * * serialization: single thread at jfs_init() */ int txInit(void) { int k, size; struct sysinfo si; /* Set defaults for nTxLock and nTxBlock if unset */ if (nTxLock == -1) { if (nTxBlock == -1) { /* Base default on memory size */ si_meminfo(&si); if (si.totalram > (256 * 1024)) /* 1 GB */ nTxLock = 64 * 1024; else nTxLock = si.totalram >> 2; } else if (nTxBlock > (8 * 1024)) nTxLock = 64 * 1024; else nTxLock = nTxBlock << 3; } if (nTxBlock == -1) nTxBlock = nTxLock >> 3; /* Verify tunable parameters */ if (nTxBlock < 16) nTxBlock = 16; /* No one should set it this low */ if (nTxBlock > 65536) nTxBlock = 65536; if (nTxLock < 256) nTxLock = 256; /* No one should set it this low */ if (nTxLock > 65536) nTxLock = 65536; printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", nTxBlock, nTxLock); /* * initialize transaction block (tblock) table * * transaction id (tid) = tblock index * tid = 0 is reserved. */ TxLockLWM = (nTxLock * 4) / 10; TxLockHWM = (nTxLock * 7) / 10; TxLockVHWM = (nTxLock * 8) / 10; size = sizeof(struct tblock) * nTxBlock; TxBlock = vmalloc(size); if (TxBlock == NULL) return -ENOMEM; for (k = 1; k < nTxBlock - 1; k++) { TxBlock[k].next = k + 1; init_waitqueue_head(&TxBlock[k].gcwait); init_waitqueue_head(&TxBlock[k].waitor); } TxBlock[k].next = 0; init_waitqueue_head(&TxBlock[k].gcwait); init_waitqueue_head(&TxBlock[k].waitor); TxAnchor.freetid = 1; init_waitqueue_head(&TxAnchor.freewait); stattx.maxtid = 1; /* statistics */ /* * initialize transaction lock (tlock) table * * transaction lock id = tlock index * tlock id = 0 is reserved. */ size = sizeof(struct tlock) * nTxLock; TxLock = vmalloc(size); if (TxLock == NULL) { vfree(TxBlock); return -ENOMEM; } /* initialize tlock table */ for (k = 1; k < nTxLock - 1; k++) TxLock[k].next = k + 1; TxLock[k].next = 0; init_waitqueue_head(&TxAnchor.freelockwait); init_waitqueue_head(&TxAnchor.lowlockwait); TxAnchor.freelock = 1; TxAnchor.tlocksInUse = 0; INIT_LIST_HEAD(&TxAnchor.anon_list); INIT_LIST_HEAD(&TxAnchor.anon_list2); LAZY_LOCK_INIT(); INIT_LIST_HEAD(&TxAnchor.unlock_queue); stattx.maxlid = 1; /* statistics */ return 0; } /* * NAME: txExit() * * FUNCTION: clean up when module is unloaded */ void txExit(void) { vfree(TxLock); TxLock = NULL; vfree(TxBlock); TxBlock = NULL; } /* * NAME: txBegin() * * FUNCTION: start a transaction. * * PARAMETER: sb - superblock * flag - force for nested tx; * * RETURN: tid - transaction id * * note: flag force allows to start tx for nested tx * to prevent deadlock on logsync barrier; */ tid_t txBegin(struct super_block *sb, int flag) { tid_t t; struct tblock *tblk; struct jfs_log *log; jfs_info("txBegin: flag = 0x%x", flag); log = JFS_SBI(sb)->log; if (!log) { jfs_error(sb, "read-only filesystem\n"); return 0; } TXN_LOCK(); INCREMENT(TxStat.txBegin); retry: if (!(flag & COMMIT_FORCE)) { /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag) || test_bit(log_QUIESCE, &log->flag)) { INCREMENT(TxStat.txBegin_barrier); TXN_SLEEP(&log->syncwait); goto retry; } } if (flag == 0) { /* * Don't begin transaction if we're getting starved for tlocks * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately * free tlocks) */ if (TxAnchor.tlocksInUse > TxLockVHWM) { INCREMENT(TxStat.txBegin_lockslow); TXN_SLEEP(&TxAnchor.lowlockwait); goto retry; } } /* * allocate transaction id/block */ if ((t = TxAnchor.freetid) == 0) { jfs_info("txBegin: waiting for free tid"); INCREMENT(TxStat.txBegin_freetid); TXN_SLEEP(&TxAnchor.freewait); goto retry; } tblk = tid_to_tblock(t); if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { /* Don't let a non-forced transaction take the last tblk */ jfs_info("txBegin: waiting for free tid"); INCREMENT(TxStat.txBegin_freetid); TXN_SLEEP(&TxAnchor.freewait); goto retry; } TxAnchor.freetid = tblk->next; /* * initialize transaction */ /* * We can't zero the whole thing or we screw up another thread being * awakened after sleeping on tblk->waitor * * memset(tblk, 0, sizeof(struct tblock)); */ tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; tblk->sb = sb; ++log->logtid; tblk->logtid = log->logtid; ++log->active; HIGHWATERMARK(stattx.maxtid, t); /* statistics */ INCREMENT(stattx.ntid); /* statistics */ TXN_UNLOCK(); jfs_info("txBegin: returning tid = %d", t); return t; } /* * NAME: txBeginAnon() * * FUNCTION: start an anonymous transaction. * Blocks if logsync or available tlocks are low to prevent * anonymous tlocks from depleting supply. * * PARAMETER: sb - superblock * * RETURN: none */ void txBeginAnon(struct super_block *sb) { struct jfs_log *log; log = JFS_SBI(sb)->log; TXN_LOCK(); INCREMENT(TxStat.txBeginAnon); retry: /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag) || test_bit(log_QUIESCE, &log->flag)) { INCREMENT(TxStat.txBeginAnon_barrier); TXN_SLEEP(&log->syncwait); goto retry; } /* * Don't begin transaction if we're getting starved for tlocks */ if (TxAnchor.tlocksInUse > TxLockVHWM) { INCREMENT(TxStat.txBeginAnon_lockslow); TXN_SLEEP(&TxAnchor.lowlockwait); goto retry; } TXN_UNLOCK(); } /* * txEnd() * * function: free specified transaction block. * * logsync barrier processing: * * serialization: */ void txEnd(tid_t tid) { struct tblock *tblk = tid_to_tblock(tid); struct jfs_log *log; jfs_info("txEnd: tid = %d", tid); TXN_LOCK(); /* * wakeup transactions waiting on the page locked * by the current transaction */ TXN_WAKEUP(&tblk->waitor); log = JFS_SBI(tblk->sb)->log; /* * Lazy commit thread can't free this guy until we mark it UNLOCKED, * otherwise, we would be left with a transaction that may have been * reused. * * Lazy commit thread will turn off tblkGC_LAZY before calling this * routine. */ if (tblk->flag & tblkGC_LAZY) { jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); TXN_UNLOCK(); spin_lock_irq(&log->gclock); // LOGGC_LOCK tblk->flag |= tblkGC_UNLOCKED; spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK return; } jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); assert(tblk->next == 0); /* * insert tblock back on freelist */ tblk->next = TxAnchor.freetid; TxAnchor.freetid = tid; /* * mark the tblock not active */ if (--log->active == 0) { clear_bit(log_FLUSH, &log->flag); /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag)) { TXN_UNLOCK(); /* write dirty metadata & forward log syncpt */ jfs_syncpt(log, 1); jfs_info("log barrier off: 0x%x", log->lsn); /* enable new transactions start */ clear_bit(log_SYNCBARRIER, &log->flag); /* wakeup all waitors for logsync barrier */ TXN_WAKEUP(&log->syncwait); goto wakeup; } } TXN_UNLOCK(); wakeup: /* * wakeup all waitors for a free tblock */ TXN_WAKEUP(&TxAnchor.freewait); } /* * txLock() * * function: acquire a transaction lock on the specified <mp> * * parameter: * * return: transaction lock id * * serialization: */ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, int type) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); int dir_xtree = 0; lid_t lid; tid_t xtid; struct tlock *tlck; struct xtlock *xtlck; struct linelock *linelock; xtpage_t *p; struct tblock *tblk; TXN_LOCK(); if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && !(mp->xflag & COMMIT_PAGE)) { /* * Directory inode is special. It can have both an xtree tlock * and a dtree tlock associated with it. */ dir_xtree = 1; lid = jfs_ip->xtlid; } else lid = mp->lid; /* is page not locked by a transaction ? */ if (lid == 0) goto allocateLock; jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); /* is page locked by the requester transaction ? */ tlck = lid_to_tlock(lid); if ((xtid = tlck->tid) == tid) { TXN_UNLOCK(); goto grantLock; } /* * is page locked by anonymous transaction/lock ? * * (page update without transaction (i.e., file write) is * locked under anonymous transaction tid = 0: * anonymous tlocks maintained on anonymous tlock list of * the inode of the page and available to all anonymous * transactions until txCommit() time at which point * they are transferred to the transaction tlock list of * the committing transaction of the inode) */ if (xtid == 0) { tlck->tid = tid; TXN_UNLOCK(); tblk = tid_to_tblock(tid); /* * The order of the tlocks in the transaction is important * (during truncate, child xtree pages must be freed before * parent's tlocks change the working map). * Take tlock off anonymous list and add to tail of * transaction list * * Note: We really need to get rid of the tid & lid and * use list_head's. This code is getting UGLY! */ if (jfs_ip->atlhead == lid) { if (jfs_ip->atltail == lid) { /* only anonymous txn. * Remove from anon_list */ TXN_LOCK(); list_del_init(&jfs_ip->anon_inode_list); TXN_UNLOCK(); } jfs_ip->atlhead = tlck->next; } else { lid_t last; for (last = jfs_ip->atlhead; lid_to_tlock(last)->next != lid; last = lid_to_tlock(last)->next) { assert(last); } lid_to_tlock(last)->next = tlck->next; if (jfs_ip->atltail == lid) jfs_ip->atltail = last; } /* insert the tlock at tail of transaction tlock list */ if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; goto grantLock; } goto waitLock; /* * allocate a tlock */ allocateLock: lid = txLockAlloc(); tlck = lid_to_tlock(lid); /* * initialize tlock */ tlck->tid = tid; TXN_UNLOCK(); /* mark tlock for meta-data page */ if (mp->xflag & COMMIT_PAGE) { tlck->flag = tlckPAGELOCK; /* mark the page dirty and nohomeok */ metapage_nohomeok(mp); jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", mp, mp->nohomeok, tid, tlck); /* if anonymous transaction, and buffer is on the group * commit synclist, mark inode to show this. This will * prevent the buffer from being marked nohomeok for too * long a time. */ if ((tid == 0) && mp->lsn) set_cflag(COMMIT_Synclist, ip); } /* mark tlock for in-memory inode */ else tlck->flag = tlckINODELOCK; if (S_ISDIR(ip->i_mode)) tlck->flag |= tlckDIRECTORY; tlck->type = 0; /* bind the tlock and the page */ tlck->ip = ip; tlck->mp = mp; if (dir_xtree) jfs_ip->xtlid = lid; else mp->lid = lid; /* * enqueue transaction lock to transaction/inode */ /* insert the tlock at tail of transaction tlock list */ if (tid) { tblk = tid_to_tblock(tid); if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; } /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list */ else { tlck->next = jfs_ip->atlhead; jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; TXN_LOCK(); list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); TXN_UNLOCK(); } } /* initialize type dependent area for linelock */ linelock = (struct linelock *) & tlck->lock; linelock->next = 0; linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKSHORT; linelock->index = 0; switch (type & tlckTYPE) { case tlckDTREE: linelock->l2linesize = L2DTSLOTSIZE; break; case tlckXTREE: linelock->l2linesize = L2XTSLOTSIZE; xtlck = (struct xtlock *) linelock; xtlck->header.offset = 0; xtlck->header.length = 2; if (type & tlckNEW) { xtlck->lwm.offset = XTENTRYSTART; } else { if (mp->xflag & COMMIT_PAGE) p = (xtpage_t *) mp->data; else p = (xtpage_t *) &jfs_ip->i_xtroot; xtlck->lwm.offset = le16_to_cpu(p->header.nextindex); } xtlck->lwm.length = 0; /* ! */ xtlck->twm.offset = 0; xtlck->hwm.offset = 0; xtlck->index = 2; break; case tlckINODE: linelock->l2linesize = L2INODESLOTSIZE; break; case tlckDATA: linelock->l2linesize = L2DATASLOTSIZE; break; default: jfs_err("UFO tlock:0x%p", tlck); } /* * update tlock vector */ grantLock: tlck->type |= type; return tlck; /* * page is being locked by another transaction: */ waitLock: /* Only locks on ipimap or ipaimap should reach here */ /* assert(jfs_ip->fileset == AGGREGATE_I); */ if (jfs_ip->fileset != AGGREGATE_I) { printk(KERN_ERR "txLock: trying to lock locked page!"); print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, ip, sizeof(*ip), 0); print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, mp, sizeof(*mp), 0); print_hex_dump(KERN_ERR, "Locker's tblock: ", DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), sizeof(struct tblock), 0); print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, tlck, sizeof(*tlck), 0); BUG(); } INCREMENT(stattx.waitlock); /* statistics */ TXN_UNLOCK(); release_metapage(mp); TXN_LOCK(); xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); /* Recheck everything since dropping TXN_LOCK */ if (xtid && (tlck->mp == mp) && (mp->lid == lid)) TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); else TXN_UNLOCK(); jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); return NULL; } /* * NAME: txRelease() * * FUNCTION: Release buffers associated with transaction locks, but don't * mark homeok yet. The allows other transactions to modify * buffers, but won't let them go to disk until commit record * actually gets written. * * PARAMETER: * tblk - * * RETURN: Errors from subroutines. */ static void txRelease(struct tblock * tblk) { struct metapage *mp; lid_t lid; struct tlock *tlck; TXN_LOCK(); for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); mp->lid = 0; } } /* * wakeup transactions waiting on a page locked * by the current transaction */ TXN_WAKEUP(&tblk->waitor); TXN_UNLOCK(); } /* * NAME: txUnlock() * * FUNCTION: Initiates pageout of pages modified by tid in journalled * objects and frees their lockwords. */ static void txUnlock(struct tblock * tblk) { struct tlock *tlck; struct linelock *linelock; lid_t lid, next, llid, k; struct metapage *mp; struct jfs_log *log; int difft, diffp; unsigned long flags; jfs_info("txUnlock: tblk = 0x%p", tblk); log = JFS_SBI(tblk->sb)->log; /* * mark page under tlock homeok (its log has been written): */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); /* unbind page from tlock */ if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); /* hold buffer */ hold_metapage(mp); assert(mp->nohomeok > 0); _metapage_homeok(mp); /* inherit younger/larger clsn */ LOGSYNC_LOCK(log, flags); if (mp->clsn) { logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; } else mp->clsn = tblk->clsn; LOGSYNC_UNLOCK(log, flags); assert(!(tlck->flag & tlckFREEPAGE)); put_metapage(mp); } /* insert tlock, and linelock(s) of the tlock if any, * at head of freelist */ TXN_LOCK(); llid = ((struct linelock *) & tlck->lock)->next; while (llid) { linelock = (struct linelock *) lid_to_tlock(llid); k = linelock->next; txLockFree(llid); llid = k; } txLockFree(lid); TXN_UNLOCK(); } tblk->next = tblk->last = 0; /* * remove tblock from logsynclist * (allocation map pages inherited lsn of tblk and * has been inserted in logsync list at txUpdateMap()) */ if (tblk->lsn) { LOGSYNC_LOCK(log, flags); log->count--; list_del(&tblk->synclist); LOGSYNC_UNLOCK(log, flags); } } /* * txMaplock() * * function: allocate a transaction lock for freed page/entry; * for freed page, maplock is used as xtlock/dtlock type; */ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); lid_t lid; struct tblock *tblk; struct tlock *tlck; struct maplock *maplock; TXN_LOCK(); /* * allocate a tlock */ lid = txLockAlloc(); tlck = lid_to_tlock(lid); /* * initialize tlock */ tlck->tid = tid; /* bind the tlock and the object */ tlck->flag = tlckINODELOCK; if (S_ISDIR(ip->i_mode)) tlck->flag |= tlckDIRECTORY; tlck->ip = ip; tlck->mp = NULL; tlck->type = type; /* * enqueue transaction lock to transaction/inode */ /* insert the tlock at tail of transaction tlock list */ if (tid) { tblk = tid_to_tblock(tid); if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; } /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list */ else { tlck->next = jfs_ip->atlhead; jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); } } TXN_UNLOCK(); /* initialize type dependent area for maplock */ maplock = (struct maplock *) & tlck->lock; maplock->next = 0; maplock->maxcnt = 0; maplock->index = 0; return tlck; } /* * txLinelock() * * function: allocate a transaction lock for log vector list */ struct linelock *txLinelock(struct linelock * tlock) { lid_t lid; struct tlock *tlck; struct linelock *linelock; TXN_LOCK(); /* allocate a TxLock structure */ lid = txLockAlloc(); tlck = lid_to_tlock(lid); TXN_UNLOCK(); /* initialize linelock */ linelock = (struct linelock *) tlck; linelock->next = 0; linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKLONG; linelock->index = 0; if (tlck->flag & tlckDIRECTORY) linelock->flag |= tlckDIRECTORY; /* append linelock after tlock */ linelock->next = tlock->next; tlock->next = lid; return linelock; } /* * transaction commit management * ----------------------------- */ /* * NAME: txCommit() * * FUNCTION: commit the changes to the objects specified in * clist. For journalled segments only the * changes of the caller are committed, ie by tid. * for non-journalled segments the data are flushed to * disk and then the change to the disk inode and indirect * blocks committed (so blocks newly allocated to the * segment will be made a part of the segment atomically). * * all of the segments specified in clist must be in * one file system. no more than 6 segments are needed * to handle all unix svcs. * * if the i_nlink field (i.e. disk inode link count) * is zero, and the type of inode is a regular file or * directory, or symbolic link , the inode is truncated * to zero length. the truncation is committed but the * VM resources are unaffected until it is closed (see * iput and iclose). * * PARAMETER: * * RETURN: * * serialization: * on entry the inode lock on each segment is assumed * to be held. * * i/o error: */ int txCommit(tid_t tid, /* transaction identifier */ int nip, /* number of inodes to commit */ struct inode **iplist, /* list of inode to commit */ int flag) { int rc = 0; struct commit cd; struct jfs_log *log; struct tblock *tblk; struct lrd *lrd; struct inode *ip; struct jfs_inode_info *jfs_ip; int k, n; ino_t top; struct super_block *sb; jfs_info("txCommit, tid = %d, flag = %d", tid, flag); /* is read-only file system ? */ if (isReadOnly(iplist[0])) { rc = -EROFS; goto TheEnd; } sb = cd.sb = iplist[0]->i_sb; cd.tid = tid; if (tid == 0) tid = txBegin(sb, 0); tblk = tid_to_tblock(tid); /* * initialize commit structure */ log = JFS_SBI(sb)->log; cd.log = log; /* initialize log record descriptor in commit */ lrd = &cd.lrd; lrd->logtid = cpu_to_le32(tblk->logtid); lrd->backchain = 0; tblk->xflag |= flag; if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) tblk->xflag |= COMMIT_LAZY; /* * prepare non-journaled objects for commit * * flush data pages of non-journaled file * to prevent the file getting non-initialized disk blocks * in case of crash. * (new blocks - ) */ cd.iplist = iplist; cd.nip = nip; /* * acquire transaction lock on (on-disk) inodes * * update on-disk inode from in-memory inode * acquiring transaction locks for AFTER records * on the on-disk inode of file object * * sort the inodes array by inode number in descending order * to prevent deadlock when acquiring transaction lock * of on-disk inodes on multiple on-disk inode pages by * multiple concurrent transactions */ for (k = 0; k < cd.nip; k++) { top = (cd.iplist[k])->i_ino; for (n = k + 1; n < cd.nip; n++) { ip = cd.iplist[n]; if (ip->i_ino > top) { top = ip->i_ino; cd.iplist[n] = cd.iplist[k]; cd.iplist[k] = ip; } } ip = cd.iplist[k]; jfs_ip = JFS_IP(ip); /* * BUGBUG - This code has temporarily been removed. The * intent is to ensure that any file data is written before * the metadata is committed to the journal. This prevents * uninitialized data from appearing in a file after the * journal has been replayed. (The uninitialized data * could be sensitive data removed by another user.) * * The problem now is that we are holding the IWRITELOCK * on the inode, and calling filemap_fdatawrite on an * unmapped page will cause a deadlock in jfs_get_block. * * The long term solution is to pare down the use of * IWRITELOCK. We are currently holding it too long. * We could also be smarter about which data pages need * to be written before the transaction is committed and * when we don't need to worry about it at all. * * if ((!S_ISDIR(ip->i_mode)) * && (tblk->flag & COMMIT_DELETE) == 0) * filemap_write_and_wait(ip->i_mapping); */ /* * Mark inode as not dirty. It will still be on the dirty * inode list, but we'll know not to commit it again unless * it gets marked dirty again */ clear_cflag(COMMIT_Dirty, ip); /* inherit anonymous tlock(s) of inode */ if (jfs_ip->atlhead) { lid_to_tlock(jfs_ip->atltail)->next = tblk->next; tblk->next = jfs_ip->atlhead; if (!tblk->last) tblk->last = jfs_ip->atltail; jfs_ip->atlhead = jfs_ip->atltail = 0; TXN_LOCK(); list_del_init(&jfs_ip->anon_inode_list); TXN_UNLOCK(); } /* * acquire transaction lock on on-disk inode page * (become first tlock of the tblk's tlock list) */ if (((rc = diWrite(tid, ip)))) goto out; } /* * write log records from transaction locks * * txUpdateMap() resets XAD_NEW in XAD. */ txLog(log, tblk, &cd); /* * Ensure that inode isn't reused before * lazy commit thread finishes processing */ if (tblk->xflag & COMMIT_DELETE) { ihold(tblk->u.ip); /* * Avoid a rare deadlock * * If the inode is locked, we may be blocked in * jfs_commit_inode. If so, we don't want the * lazy_commit thread doing the last iput() on the inode * since that may block on the locked inode. Instead, * commit the transaction synchronously, so the last iput * will be done by the calling thread (or later) */ /* * I believe this code is no longer needed. Splitting I_LOCK * into two bits, I_NEW and I_SYNC should prevent this * deadlock as well. But since I don't have a JFS testload * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. * Joern */ if (tblk->u.ip->i_state & I_SYNC) tblk->xflag &= ~COMMIT_LAZY; } ASSERT((!(tblk->xflag & COMMIT_DELETE)) || ((tblk->u.ip->i_nlink == 0) && !test_cflag(COMMIT_Nolink, tblk->u.ip))); /* * write COMMIT log record */ lrd->type = cpu_to_le16(LOG_COMMIT); lrd->length = 0; lmLog(log, tblk, lrd, NULL); lmGroupCommit(log, tblk); /* * - transaction is now committed - */ /* * force pages in careful update * (imap addressing structure update) */ if (flag & COMMIT_FORCE) txForce(tblk); /* * update allocation map. * * update inode allocation map and inode: * free pager lock on memory object of inode if any. * update block allocation map. * * txUpdateMap() resets XAD_NEW in XAD. */ if (tblk->xflag & COMMIT_FORCE) txUpdateMap(tblk); /* * free transaction locks and pageout/free pages */ txRelease(tblk); if ((tblk->flag & tblkGC_LAZY) == 0) txUnlock(tblk); /* * reset in-memory object state */ for (k = 0; k < cd.nip; k++) { ip = cd.iplist[k]; jfs_ip = JFS_IP(ip); /* * reset in-memory inode state */ jfs_ip->bxflag = 0; jfs_ip->blid = 0; } out: if (rc != 0) txAbort(tid, 1); TheEnd: jfs_info("txCommit: tid = %d, returning %d", tid, rc); return rc; } /* * NAME: txLog() * * FUNCTION: Writes AFTER log records for all lines modified * by tid for segments specified by inodes in comdata. * Code assumes only WRITELOCKS are recorded in lockwords. * * PARAMETERS: * * RETURN : */ static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd) { struct inode *ip; lid_t lid; struct tlock *tlck; struct lrd *lrd = &cd->lrd; /* * write log record(s) for each tlock of transaction, */ for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); tlck->flag |= tlckLOG; /* initialize lrd common */ ip = tlck->ip; lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); /* write log record of page from the tlock */ switch (tlck->type & tlckTYPE) { case tlckXTREE: xtLog(log, tblk, lrd, tlck); break; case tlckDTREE: dtLog(log, tblk, lrd, tlck); break; case tlckINODE: diLog(log, tblk, lrd, tlck, cd); break; case tlckMAP: mapLog(log, tblk, lrd, tlck); break; case tlckDATA: dataLog(log, tblk, lrd, tlck); break; default: jfs_err("UFO tlock:0x%p", tlck); } } return; } /* * diLog() * * function: log inode tlock and format maplock to update bmap; */ static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck, struct commit *cd) { struct metapage *mp; pxd_t *pxd; struct pxd_lock *pxdlock; mp = tlck->mp; /* initialize as REDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_INODE); lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); pxd = &lrd->log.redopage.pxd; /* * inode after image */ if (tlck->type & tlckENTRY) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } else if (tlck->type & tlckFREE) { /* * free inode extent * * (pages of the freed inode extent have been invalidated and * a maplock for free of the extent has been formatted at * txLock() time); * * the tlock had been acquired on the inode allocation map page * (iag) that specifies the freed extent, even though the map * page is not itself logged, to prevent pageout of the map * page before the log; */ /* log LOG_NOREDOINOEXT of the freed inode extent for * logredo() to start NoRedoPage filters, and to update * imap and bmap for free of the extent; */ lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); /* * For the LOG_NOREDOINOEXT record, we need * to pass the IAG number and inode extent * index (within that IAG) from which the * extent is being released. These have been * passed to us in the iplist[1] and iplist[2]. */ lrd->log.noredoinoext.iagnum = cpu_to_le32((u32) (size_t) cd->iplist[1]); lrd->log.noredoinoext.inoext_idx = cpu_to_le32((u32) (size_t) cd->iplist[2]); pxdlock = (struct pxd_lock *) & tlck->lock; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* update bmap */ tlck->flag |= tlckUPDATEMAP; /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } else jfs_err("diLog: UFO type tlck:0x%p", tlck); return; } /* * dataLog() * * function: log data tlock */ static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck) { struct metapage *mp; pxd_t *pxd; mp = tlck->mp; /* initialize as REDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_DATA); lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); pxd = &lrd->log.redopage.pxd; /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); if (jfs_dirtable_inline(tlck->ip)) { /* * The table has been truncated, we've must have deleted * the last entry, so don't bother logging this */ mp->lid = 0; grab_metapage(mp); metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; return; } PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * dtLog() * * function: log dtree tlock and format maplock to update bmap; */ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct metapage *mp; struct pxd_lock *pxdlock; pxd_t *pxd; mp = tlck->mp; /* initialize as REDOPAGE/NOREDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); /* * page extension via relocation: entry insertion; * page extension in-place: entry insertion; * new right page from page split, reinitialized in-line * root from root page split: entry insertion; */ if (tlck->type & (tlckNEW | tlckEXTEND)) { /* log after-image of the new page for logredo(): * mark log (LOG_NEW) for logredo() to initialize * freelist and update bmap for alloc of the new page; */ lrd->type = cpu_to_le16(LOG_REDOPAGE); if (tlck->type & tlckEXTEND) lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); else lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bPMAP for * alloc of the new page; */ if (tlck->type & tlckBTROOT) return; tlck->flag |= tlckUPDATEMAP; pxdlock = (struct pxd_lock *) & tlck->lock; pxdlock->flag = mlckALLOCPXD; pxdlock->pxd = *pxd; pxdlock->index = 1; /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * entry insertion/deletion, * sibling page link update (old right page before split); */ if (tlck->type & (tlckENTRY | tlckRELINK)) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * page deletion: page has been invalidated * page relocation: source extent * * a maplock for free of the page has been formatted * at txLock() time); */ if (tlck->type & (tlckFREE | tlckRELOCATE)) { /* log LOG_NOREDOPAGE of the deleted page for logredo() * to start NoRedoPage filter and to update bmap for free * of the deletd page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); pxdlock = (struct pxd_lock *) & tlck->lock; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time; */ tlck->flag |= tlckUPDATEMAP; } return; } /* * xtLog() * * function: log xtree tlock and format maplock to update bmap; */ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct inode *ip; struct metapage *mp; xtpage_t *p; struct xtlock *xtlck; struct maplock *maplock; struct xdlistlock *xadlock; struct pxd_lock *pxdlock; pxd_t *page_pxd; int next, lwm, hwm; ip = tlck->ip; mp = tlck->mp; /* initialize as REDOPAGE/NOREDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); page_pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) { lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); p = (xtpage_t *) &JFS_IP(ip)->i_xtroot; if (S_ISDIR(ip->i_mode)) lrd->log.redopage.type |= cpu_to_le16(LOG_DIR_XTREE); } else p = (xtpage_t *) mp->data; next = le16_to_cpu(p->header.nextindex); xtlck = (struct xtlock *) & tlck->lock; maplock = (struct maplock *) & tlck->lock; xadlock = (struct xdlistlock *) maplock; /* * entry insertion/extension; * sibling page link update (old right page before split); */ if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { /* log after-image for logredo(): * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. */ lwm = xtlck->lwm.offset; if (lwm == 0) lwm = XTPAGEMAXSLOT; if (lwm == next) goto out; if (lwm > next) { jfs_err("xtLog: lwm > next"); goto out; } tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock */ xadlock->flag = mlckALLOCPXDLIST; pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) { PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); p->xad[lwm + i].flag &= ~(XAD_NEW | XAD_EXTENDED); pxd++; } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ xadlock->flag = mlckALLOCXADLIST; xadlock->xdlist = &p->xad[lwm]; tblk->xflag &= ~COMMIT_LAZY; } jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); maplock->index = 1; out: /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * page deletion: file deletion/truncation (ref. xtTruncate()) * * (page will be invalidated after log is written and bmap * is updated from the page); */ if (tlck->type & tlckFREE) { /* LOG_NOREDOPAGE log for NoRedoPage filter: * if page free from file delete, NoRedoFile filter from * inode image of zero link count will subsume NoRedoPage * filters for each page; * if page free from file truncattion, write NoRedoPage * filter; * * upadte of block allocation map for the page itself: * if page free from deletion and truncation, LOG_UPDATEMAP * log for the page itself is generated from processing * its parent page xad entries; */ /* if page free from file truncation, log LOG_NOREDOPAGE * of the deleted page for logredo() to start NoRedoPage * filter for the page; */ if (tblk->xflag & COMMIT_TRUNCATE) { /* write NOREDOPAGE for the page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb-> s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); if (tlck->type & tlckBTROOT) { /* Empty xtree must be logged */ lrd->type = cpu_to_le16(LOG_REDOPAGE); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); } } /* init LOG_UPDATEMAP of the freed extents * XAD[XTENTRYSTART:hwm) from the deleted page itself * for logredo() to update bmap; */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); xtlck = (struct xtlock *) & tlck->lock; hwm = xtlck->hwm.offset; lrd->log.updatemap.nxd = cpu_to_le16(hwm - XTENTRYSTART + 1); /* reformat linelock for lmLog() */ xtlck->header.offset = XTENTRYSTART; xtlck->header.length = hwm - XTENTRYSTART + 1; xtlck->index = 1; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bmap * to free extents of XAD[XTENTRYSTART:hwm) from the * deleted page itself; */ tlck->flag |= tlckUPDATEMAP; xadlock->count = hwm - XTENTRYSTART + 1; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock */ xadlock->flag = mlckFREEPXDLIST; pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) { PXDaddress(pxd, addressXAD(&p->xad[XTENTRYSTART + i])); PXDlength(pxd, lengthXAD(&p->xad[XTENTRYSTART + i])); pxd++; } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ xadlock->flag = mlckFREEXADLIST; xadlock->xdlist = &p->xad[XTENTRYSTART]; tblk->xflag &= ~COMMIT_LAZY; } jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", tlck->ip, mp, xadlock->count); maplock->index = 1; /* mark page as invalid */ if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) && !(tlck->type & tlckBTROOT)) tlck->flag |= tlckFREEPAGE; /* else (tblk->xflag & COMMIT_PMAP) ? release the page; */ return; } /* * page/entry truncation: file truncation (ref. xtTruncate()) * * |----------+------+------+---------------| * | | | * | | hwm - hwm before truncation * | next - truncation point * lwm - lwm before truncation * header ? */ if (tlck->type & tlckTRUNCATE) { pxd_t pxd; /* truncated extent of xad */ int twm; /* * For truncation the entire linelock may be used, so it would * be difficult to store xad list in linelock itself. * Therefore, we'll just force transaction to be committed * synchronously, so that xtree pages won't be changed before * txUpdateMap runs. */ tblk->xflag &= ~COMMIT_LAZY; lwm = xtlck->lwm.offset; if (lwm == 0) lwm = XTPAGEMAXSLOT; hwm = xtlck->hwm.offset; twm = xtlck->twm.offset; /* * write log records */ /* log after-image for logredo(): * * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* * truncate entry XAD[twm == next - 1]: */ if (twm == next - 1) { /* init LOG_UPDATEMAP for logredo() to update bmap for * free of truncated delta extent of the truncated * entry XAD[next - 1]: * (xtlck->pxdlock = truncated delta extent); */ pxdlock = (struct pxd_lock *) & xtlck->pxdlock; /* assert(pxdlock->type & tlckTRUNCATE); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; pxd = pxdlock->pxd; /* save to format maplock */ lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); } /* * free entries XAD[next:hwm]: */ if (hwm >= next) { /* init LOG_UPDATEMAP of the freed extents * XAD[next:hwm] from the deleted page itself * for logredo() to update bmap; */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); xtlck = (struct xtlock *) & tlck->lock; hwm = xtlck->hwm.offset; lrd->log.updatemap.nxd = cpu_to_le16(hwm - next + 1); /* reformat linelock for lmLog() */ xtlck->header.offset = next; xtlck->header.length = hwm - next + 1; xtlck->index = 1; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); } /* * format maplock(s) for txUpdateMap() to update bmap */ maplock->index = 0; /* * allocate entries XAD[lwm:next): */ if (lwm < next) { /* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. */ tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; xadlock->xdlist = &p->xad[lwm]; jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d", tlck->ip, mp, xadlock->count, lwm, next); maplock->index++; xadlock++; } /* * truncate entry XAD[twm == next - 1]: */ if (twm == next - 1) { /* format a maplock for txUpdateMap() to update bmap * to free truncated delta extent of the truncated * entry XAD[next - 1]; * (xtlck->pxdlock = truncated delta extent); */ tlck->flag |= tlckUPDATEMAP; pxdlock = (struct pxd_lock *) xadlock; pxdlock->flag = mlckFREEPXD; pxdlock->count = 1; pxdlock->pxd = pxd; jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d", ip, mp, pxdlock->count, hwm); maplock->index++; xadlock++; } /* * free entries XAD[next:hwm]: */ if (hwm >= next) { /* format a maplock for txUpdateMap() to update bmap * to free extents of XAD[next:hwm] from thedeleted * page itself; */ tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckFREEXADLIST; xadlock->count = hwm - next + 1; xadlock->xdlist = &p->xad[next]; jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d", tlck->ip, mp, xadlock->count, next, hwm); maplock->index++; } /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } return; } /* * mapLog() * * function: log from maplock of freed data extents; */ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct pxd_lock *pxdlock; int i, nlock; pxd_t *pxd; /* * page relocation: free the source page extent * * a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time saving the src * relocated page address; */ if (tlck->type & tlckRELOCATE) { /* log LOG_NOREDOPAGE of the old relocated page * for logredo() to start NoRedoPage filter; */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); pxdlock = (struct pxd_lock *) & tlck->lock; pxd = &lrd->log.redopage.pxd; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* (N.B. currently, logredo() does NOT update bmap * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); * if page free from relocation, LOG_UPDATEMAP log is * specifically generated now for logredo() * to update bmap for free of src relocated page; * (new flag LOG_RELOCATE may be introduced which will * inform logredo() to start NORedoPage filter and also * update block allocation map at the same time, thus * avoiding an extra log write); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time; */ tlck->flag |= tlckUPDATEMAP; return; } /* * Otherwise it's not a relocate request * */ else { /* log LOG_UPDATEMAP for logredo() to update bmap for * free of truncated/relocated delta extent of the data; * e.g.: external EA extent, relocated/truncated extent * from xtTailgate(); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); pxdlock = (struct pxd_lock *) & tlck->lock; nlock = pxdlock->index; for (i = 0; i < nlock; i++, pxdlock++) { if (pxdlock->flag & mlckALLOCPXD) lrd->log.updatemap.type = cpu_to_le16(LOG_ALLOCPXD); else lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", (ulong) addressPXD(&pxdlock->pxd), lengthPXD(&pxdlock->pxd)); } /* update bmap */ tlck->flag |= tlckUPDATEMAP; } } /* * txEA() * * function: acquire maplock for EA/ACL extents or * set COMMIT_INLINE flag; */ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) { struct tlock *tlck = NULL; struct pxd_lock *maplock = NULL, *pxdlock = NULL; /* * format maplock for alloc of new EA extent */ if (newea) { /* Since the newea could be a completely zeroed entry we need to * check for the two flags which indicate we should actually * commit new EA data */ if (newea->flag & DXD_EXTENT) { tlck = txMaplock(tid, ip, tlckMAP); maplock = (struct pxd_lock *) & tlck->lock; pxdlock = (struct pxd_lock *) maplock; pxdlock->flag = mlckALLOCPXD; PXDaddress(&pxdlock->pxd, addressDXD(newea)); PXDlength(&pxdlock->pxd, lengthDXD(newea)); pxdlock++; maplock->index = 1; } else if (newea->flag & DXD_INLINE) { tlck = NULL; set_cflag(COMMIT_Inlineea, ip); } } /* * format maplock for free of old EA extent */ if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { if (tlck == NULL) { tlck = txMaplock(tid, ip, tlckMAP); maplock = (struct pxd_lock *) & tlck->lock; pxdlock = (struct pxd_lock *) maplock; maplock->index = 0; } pxdlock->flag = mlckFREEPXD; PXDaddress(&pxdlock->pxd, addressDXD(oldea)); PXDlength(&pxdlock->pxd, lengthDXD(oldea)); maplock->index++; } } /* * txForce() * * function: synchronously write pages locked by transaction * after txLog() but before txUpdateMap(); */ static void txForce(struct tblock * tblk) { struct tlock *tlck; lid_t lid, next; struct metapage *mp; /* * reverse the order of transaction tlocks in * careful update order of address index pages * (right to left, bottom up) */ tlck = lid_to_tlock(tblk->next); lid = tlck->next; tlck->next = 0; while (lid) { tlck = lid_to_tlock(lid); next = tlck->next; tlck->next = tblk->next; tblk->next = lid; lid = next; } /* * synchronously write the page, and * hold the page for txUpdateMap(); */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); if (tlck->flag & tlckWRITEPAGE) { tlck->flag &= ~tlckWRITEPAGE; /* do not release page to freelist */ force_metapage(mp); #if 0 /* * The "right" thing to do here is to * synchronously write the metadata. * With the current implementation this * is hard since write_metapage requires * us to kunmap & remap the page. If we * have tlocks pointing into the metadata * pages, we don't want to do this. I think * we can get by with synchronously writing * the pages when they are released. */ assert(mp->nohomeok); set_bit(META_dirty, &mp->flag); set_bit(META_sync, &mp->flag); #endif } } } } /* * txUpdateMap() * * function: update persistent allocation map (and working map * if appropriate); * * parameter: */ static void txUpdateMap(struct tblock * tblk) { struct inode *ip; struct inode *ipimap; lid_t lid; struct tlock *tlck; struct maplock *maplock; struct pxd_lock pxdlock; int maptype; int k, nlock; struct metapage *mp = NULL; ipimap = JFS_SBI(tblk->sb)->ipimap; maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; /* * update block allocation map * * update allocation state in pmap (and wmap) and * update lsn of the pmap page; */ /* * scan each tlock/page of transaction for block allocation/free: * * for each tlock/page of transaction, update map. * ? are there tlock for pmap and pwmap at the same time ? */ for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); if ((tlck->flag & tlckUPDATEMAP) == 0) continue; if (tlck->flag & tlckFREEPAGE) { /* * Another thread may attempt to reuse freed space * immediately, so we want to get rid of the metapage * before anyone else has a chance to get it. * Lock metapage, update maps, then invalidate * the metapage. */ mp = tlck->mp; ASSERT(mp->xflag & COMMIT_PAGE); grab_metapage(mp); } /* * extent list: * . in-line PXD list: * . out-of-line XAD list: */ maplock = (struct maplock *) & tlck->lock; nlock = maplock->index; for (k = 0; k < nlock; k++, maplock++) { /* * allocate blocks in persistent map: * * blocks have been allocated from wmap at alloc time; */ if (maplock->flag & mlckALLOC) { txAllocPMap(ipimap, maplock, tblk); } /* * free blocks in persistent and working map: * blocks will be freed in pmap and then in wmap; * * ? tblock specifies the PMAP/PWMAP based upon * transaction * * free blocks in persistent map: * blocks will be freed from wmap at last reference * release of the object for regular files; * * Alway free blocks from both persistent & working * maps for directories */ else { /* (maplock->flag & mlckFREE) */ if (tlck->flag & tlckDIRECTORY) txFreeMap(ipimap, maplock, tblk, COMMIT_PWMAP); else txFreeMap(ipimap, maplock, tblk, maptype); } } if (tlck->flag & tlckFREEPAGE) { if (!(tblk->flag & tblkGC_LAZY)) { /* This is equivalent to txRelease */ ASSERT(mp->lid == lid); tlck->mp->lid = 0; } assert(mp->nohomeok == 1); metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; } } /* * update inode allocation map * * update allocation state in pmap and * update lsn of the pmap page; * update in-memory inode flag/state * * unlock mapper/write lock */ if (tblk->xflag & COMMIT_CREATE) { diUpdatePMap(ipimap, tblk->ino, false, tblk); /* update persistent block allocation map * for the allocation of inode extent; */ pxdlock.flag = mlckALLOCPXD; pxdlock.pxd = tblk->u.ixpxd; pxdlock.index = 1; txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); } else if (tblk->xflag & COMMIT_DELETE) { ip = tblk->u.ip; diUpdatePMap(ipimap, ip->i_ino, true, tblk); iput(ip); } } /* * txAllocPMap() * * function: allocate from persistent map; * * parameter: * ipbmap - * malock - * xad list: * pxd: * * maptype - * allocate from persistent map; * free from persistent map; * (e.g., tmp file - free from working map at releae * of last reference); * free from persistent and working map; * * lsn - log sequence number; */ static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk) { struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct xdlistlock *xadlistlock; xad_t *xad; s64 xaddr; int xlen; struct pxd_lock *pxdlock; struct xdlistlock *pxdlistlock; pxd_t *pxd; int n; /* * allocate from persistent map; */ if (maplock->flag & mlckALLOCXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); xad->flag &= ~(XAD_NEW | XAD_EXTENDED); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } else if (maplock->flag & mlckALLOCPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckALLOCPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } /* * txFreeMap() * * function: free from persistent and/or working map; * * todo: optimization */ void txFreeMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk, int maptype) { struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct xdlistlock *xadlistlock; xad_t *xad; s64 xaddr; int xlen; struct pxd_lock *pxdlock; struct xdlistlock *pxdlistlock; pxd_t *pxd; int n; jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", tblk, maplock, maptype); /* * free from persistent map; */ if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { if (maplock->flag & mlckFREEXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { if (!(xad->flag & XAD_NEW)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } else if (maplock->flag & mlckFREEPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckALLOCPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } /* * free from working map; */ if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { if (maplock->flag & mlckFREEXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbFree(ip, xaddr, (s64) xlen); xad->flag = 0; jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } else if (maplock->flag & mlckFREEPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbFree(ip, xaddr, (s64) xlen); jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckFREEPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbFree(ip, xaddr, (s64) xlen); jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } } /* * txFreelock() * * function: remove tlock from inode anonymous locklist */ void txFreelock(struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct tlock *xtlck, *tlck; lid_t xlid = 0, lid; if (!jfs_ip->atlhead) return; TXN_LOCK(); xtlck = (struct tlock *) &jfs_ip->atlhead; while ((lid = xtlck->next) != 0) { tlck = lid_to_tlock(lid); if (tlck->flag & tlckFREELOCK) { xtlck->next = tlck->next; txLockFree(lid); } else { xtlck = tlck; xlid = lid; } } if (jfs_ip->atlhead) jfs_ip->atltail = xlid; else { jfs_ip->atltail = 0; /* * If inode was on anon_list, remove it */ list_del_init(&jfs_ip->anon_inode_list); } TXN_UNLOCK(); } /* * txAbort() * * function: abort tx before commit; * * frees line-locks and segment locks for all * segments in comdata structure. * Optionally sets state of file-system to FM_DIRTY in super-block. * log age of page-frames in memory for which caller has * are reset to 0 (to avoid logwarap). */ void txAbort(tid_t tid, int dirty) { lid_t lid, next; struct metapage *mp; struct tblock *tblk = tid_to_tblock(tid); struct tlock *tlck; /* * free tlocks of the transaction */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; mp = tlck->mp; JFS_IP(tlck->ip)->xtlid = 0; if (mp) { mp->lid = 0; /* * reset lsn of page to avoid logwarap: * * (page may have been previously committed by another * transaction(s) but has not been paged, i.e., * it may be on logsync list even though it has not * been logged for the current tx.) */ if (mp->xflag & COMMIT_PAGE && mp->lsn) LogSyncRelease(mp); } /* insert tlock at head of freelist */ TXN_LOCK(); txLockFree(lid); TXN_UNLOCK(); } /* caller will free the transaction block */ tblk->next = tblk->last = 0; /* * mark filesystem dirty */ if (dirty) jfs_error(tblk->sb, "\n"); return; } /* * txLazyCommit(void) * * All transactions except those changing ipimap (COMMIT_FORCE) are * processed by this routine. This insures that the inode and block * allocation maps are updated in order. For synchronous transactions, * let the user thread finish processing after txUpdateMap() is called. */ static void txLazyCommit(struct tblock * tblk) { struct jfs_log *log; while (((tblk->flag & tblkGC_READY) == 0) && ((tblk->flag & tblkGC_UNLOCKED) == 0)) { /* We must have gotten ahead of the user thread */ jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); yield(); } jfs_info("txLazyCommit: processing tblk 0x%p", tblk); txUpdateMap(tblk); log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; spin_lock_irq(&log->gclock); // LOGGC_LOCK tblk->flag |= tblkGC_COMMITTED; if (tblk->flag & tblkGC_READY) log->gcrtc--; wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP /* * Can't release log->gclock until we've tested tblk->flag */ if (tblk->flag & tblkGC_LAZY) { spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK txUnlock(tblk); tblk->flag &= ~tblkGC_LAZY; txEnd(tblk - TxBlock); /* Convert back to tid */ } else spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); } /* * jfs_lazycommit(void) * * To be run as a kernel daemon. If lbmIODone is called in an interrupt * context, or where blocking is not wanted, this routine will process * committed transactions from the unlock queue. */ int jfs_lazycommit(void *arg) { int WorkDone; struct tblock *tblk; unsigned long flags; struct jfs_sb_info *sbi; set_freezable(); do { LAZY_LOCK(flags); jfs_commit_thread_waking = 0; /* OK to wake another thread */ while (!list_empty(&TxAnchor.unlock_queue)) { WorkDone = 0; list_for_each_entry(tblk, &TxAnchor.unlock_queue, cqueue) { sbi = JFS_SBI(tblk->sb); /* * For each volume, the transactions must be * handled in order. If another commit thread * is handling a tblk for this superblock, * skip it */ if (sbi->commit_state & IN_LAZYCOMMIT) continue; sbi->commit_state |= IN_LAZYCOMMIT; WorkDone = 1; /* * Remove transaction from queue */ list_del(&tblk->cqueue); LAZY_UNLOCK(flags); txLazyCommit(tblk); LAZY_LOCK(flags); sbi->commit_state &= ~IN_LAZYCOMMIT; /* * Don't continue in the for loop. (We can't * anyway, it's unsafe!) We want to go back to * the beginning of the list. */ break; } /* If there was nothing to do, don't continue */ if (!WorkDone) break; } /* In case a wakeup came while all threads were active */ jfs_commit_thread_waking = 0; if (freezing(current)) { LAZY_UNLOCK(flags); try_to_freeze(); } else { DECLARE_WAITQUEUE(wq, current); add_wait_queue(&jfs_commit_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); LAZY_UNLOCK(flags); schedule(); remove_wait_queue(&jfs_commit_thread_wait, &wq); } } while (!kthread_should_stop()); if (!list_empty(&TxAnchor.unlock_queue)) jfs_err("jfs_lazycommit being killed w/pending transactions!"); else jfs_info("jfs_lazycommit being killed"); return 0; } void txLazyUnlock(struct tblock * tblk) { unsigned long flags; LAZY_LOCK(flags); list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); /* * Don't wake up a commit thread if there is already one servicing * this superblock, or if the last one we woke up hasn't started yet. */ if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && !jfs_commit_thread_waking) { jfs_commit_thread_waking = 1; wake_up(&jfs_commit_thread_wait); } LAZY_UNLOCK(flags); } static void LogSyncRelease(struct metapage * mp) { struct jfs_log *log = mp->log; assert(mp->nohomeok); assert(log); metapage_homeok(mp); } /* * txQuiesce * * Block all new transactions and push anonymous transactions to * completion * * This does almost the same thing as jfs_sync below. We don't * worry about deadlocking when jfs_tlocks_low is set, since we would * expect jfs_sync to get us out of that jam. */ void txQuiesce(struct super_block *sb) { struct inode *ip; struct jfs_inode_info *jfs_ip; struct jfs_log *log = JFS_SBI(sb)->log; tid_t tid; set_bit(log_QUIESCE, &log->flag); TXN_LOCK(); restart: while (!list_empty(&TxAnchor.anon_list)) { jfs_ip = list_entry(TxAnchor.anon_list.next, struct jfs_inode_info, anon_inode_list); ip = &jfs_ip->vfs_inode; /* * inode will be removed from anonymous list * when it is committed */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); mutex_lock(&jfs_ip->commit_mutex); txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); /* * Just to be safe. I don't know how * long we can run without blocking */ cond_resched(); TXN_LOCK(); } /* * If jfs_sync is running in parallel, there could be some inodes * on anon_list2. Let's check. */ if (!list_empty(&TxAnchor.anon_list2)) { list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); goto restart; } TXN_UNLOCK(); /* * We may need to kick off the group commit */ jfs_flush_journal(log, 0); } /* * txResume() * * Allows transactions to start again following txQuiesce */ void txResume(struct super_block *sb) { struct jfs_log *log = JFS_SBI(sb)->log; clear_bit(log_QUIESCE, &log->flag); TXN_WAKEUP(&log->syncwait); } /* * jfs_sync(void) * * To be run as a kernel daemon. This is awakened when tlocks run low. * We write any inodes that have anonymous tlocks so they will become * available. */ int jfs_sync(void *arg) { struct inode *ip; struct jfs_inode_info *jfs_ip; tid_t tid; set_freezable(); do { /* * write each inode on the anonymous inode list */ TXN_LOCK(); while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { jfs_ip = list_entry(TxAnchor.anon_list.next, struct jfs_inode_info, anon_inode_list); ip = &jfs_ip->vfs_inode; if (! igrab(ip)) { /* * Inode is being freed */ list_del_init(&jfs_ip->anon_inode_list); } else if (mutex_trylock(&jfs_ip->commit_mutex)) { /* * inode will be removed from anonymous list * when it is committed */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE); txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); iput(ip); /* * Just to be safe. I don't know how * long we can run without blocking */ cond_resched(); TXN_LOCK(); } else { /* We can't get the commit mutex. It may * be held by a thread waiting for tlock's * so let's not block here. Save it to * put back on the anon_list. */ /* Move from anon_list to anon_list2 */ list_move(&jfs_ip->anon_inode_list, &TxAnchor.anon_list2); TXN_UNLOCK(); iput(ip); TXN_LOCK(); } } /* Add anon_list2 back to anon_list */ list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); if (freezing(current)) { TXN_UNLOCK(); try_to_freeze(); } else { set_current_state(TASK_INTERRUPTIBLE); TXN_UNLOCK(); schedule(); } } while (!kthread_should_stop()); jfs_info("jfs_sync being killed"); return 0; } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) int jfs_txanchor_proc_show(struct seq_file *m, void *v) { char *freewait; char *freelockwait; char *lowlockwait; freewait = waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; freelockwait = waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; lowlockwait = waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; seq_printf(m, "JFS TxAnchor\n" "============\n" "freetid = %d\n" "freewait = %s\n" "freelock = %d\n" "freelockwait = %s\n" "lowlockwait = %s\n" "tlocksInUse = %d\n" "jfs_tlocks_low = %d\n" "unlock_queue is %sempty\n", TxAnchor.freetid, freewait, TxAnchor.freelock, freelockwait, lowlockwait, TxAnchor.tlocksInUse, jfs_tlocks_low, list_empty(&TxAnchor.unlock_queue) ? "" : "not "); return 0; } #endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) int jfs_txstats_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS TxStats\n" "===========\n" "calls to txBegin = %d\n" "txBegin blocked by sync barrier = %d\n" "txBegin blocked by tlocks low = %d\n" "txBegin blocked by no free tid = %d\n" "calls to txBeginAnon = %d\n" "txBeginAnon blocked by sync barrier = %d\n" "txBeginAnon blocked by tlocks low = %d\n" "calls to txLockAlloc = %d\n" "tLockAlloc blocked by no free lock = %d\n", TxStat.txBegin, TxStat.txBegin_barrier, TxStat.txBegin_lockslow, TxStat.txBegin_freetid, TxStat.txBeginAnon, TxStat.txBeginAnon_barrier, TxStat.txBeginAnon_lockslow, TxStat.txLockAlloc, TxStat.txLockAlloc_freelock); return 0; } #endif
33 33 16 16 22 22 19 3 4 22 7 17 22 3 18 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2007-2014 Nicira, Inc. */ #include <linux/etherdevice.h> #include <linux/if.h> #include <linux/if_vlan.h> #include <linux/jhash.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/compat.h> #include <net/net_namespace.h> #include <linux/module.h> #include "datapath.h" #include "vport.h" #include "vport-internal_dev.h" static LIST_HEAD(vport_ops_list); /* Protected by RCU read lock for reading, ovs_mutex for writing. */ static struct hlist_head *dev_table; #define VPORT_HASH_BUCKETS 1024 /** * ovs_vport_init - initialize vport subsystem * * Called at module load time to initialize the vport subsystem. */ int ovs_vport_init(void) { dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head), GFP_KERNEL); if (!dev_table) return -ENOMEM; return 0; } /** * ovs_vport_exit - shutdown vport subsystem * * Called at module exit time to shutdown the vport subsystem. */ void ovs_vport_exit(void) { kfree(dev_table); } static struct hlist_head *hash_bucket(const struct net *net, const char *name) { unsigned int hash = jhash(name, strlen(name), (unsigned long) net); return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } int __ovs_vport_ops_register(struct vport_ops *ops) { int err = -EEXIST; struct vport_ops *o; ovs_lock(); list_for_each_entry(o, &vport_ops_list, list) if (ops->type == o->type) goto errout; list_add_tail(&ops->list, &vport_ops_list); err = 0; errout: ovs_unlock(); return err; } EXPORT_SYMBOL_GPL(__ovs_vport_ops_register); void ovs_vport_ops_unregister(struct vport_ops *ops) { ovs_lock(); list_del(&ops->list); ovs_unlock(); } EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister); /** * ovs_vport_locate - find a port that has already been created * * @net: network namespace * @name: name of port to find * * Must be called with ovs or RCU read lock. */ struct vport *ovs_vport_locate(const struct net *net, const char *name) { struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; hlist_for_each_entry_rcu(vport, bucket, hash_node, lockdep_ovsl_is_held()) if (!strcmp(name, ovs_vport_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; return NULL; } /** * ovs_vport_alloc - allocate and initialize new vport * * @priv_size: Size of private data area to allocate. * @ops: vport device ops * @parms: information about new vport. * * Allocate and initialize a new vport defined by @ops. The vport will contain * a private data area of size @priv_size that can be accessed using * vport_priv(). Some parameters of the vport will be initialized from @parms. * @vports that are no longer needed should be released with * vport_free(). */ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, const struct vport_parms *parms) { struct vport *vport; size_t alloc_size; int err; alloc_size = sizeof(struct vport); if (priv_size) { alloc_size = ALIGN(alloc_size, VPORT_ALIGN); alloc_size += priv_size; } vport = kzalloc(alloc_size, GFP_KERNEL); if (!vport) return ERR_PTR(-ENOMEM); vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); if (!vport->upcall_stats) { err = -ENOMEM; goto err_kfree_vport; } vport->dp = parms->dp; vport->port_no = parms->port_no; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) { err = -EINVAL; goto err_free_percpu; } return vport; err_free_percpu: free_percpu(vport->upcall_stats); err_kfree_vport: kfree(vport); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(ovs_vport_alloc); /** * ovs_vport_free - uninitialize and free vport * * @vport: vport to free * * Frees a vport allocated with vport_alloc() when it is no longer needed. * * The caller must ensure that an RCU grace period has passed since the last * time @vport was in a datapath. */ void ovs_vport_free(struct vport *vport) { /* vport is freed from RCU callback or error path, Therefore * it is safe to use raw dereference. */ kfree(rcu_dereference_raw(vport->upcall_portids)); free_percpu(vport->upcall_stats); kfree(vport); } EXPORT_SYMBOL_GPL(ovs_vport_free); static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms) { struct vport_ops *ops; list_for_each_entry(ops, &vport_ops_list, list) if (ops->type == parms->type) return ops; return NULL; } /** * ovs_vport_add - add vport device (for kernel callers) * * @parms: Information about new vport. * * Creates a new vport with the specified configuration (which is dependent on * device type). ovs_mutex must be held. */ struct vport *ovs_vport_add(const struct vport_parms *parms) { struct vport_ops *ops; struct vport *vport; ops = ovs_vport_lookup(parms); if (ops) { struct hlist_head *bucket; if (!try_module_get(ops->owner)) return ERR_PTR(-EAFNOSUPPORT); vport = ops->create(parms); if (IS_ERR(vport)) { module_put(ops->owner); return vport; } bucket = hash_bucket(ovs_dp_get_net(vport->dp), ovs_vport_name(vport)); hlist_add_head_rcu(&vport->hash_node, bucket); return vport; } /* Unlock to attempt module load and return -EAGAIN if load * was successful as we need to restart the port addition * workflow. */ ovs_unlock(); request_module("vport-type-%d", parms->type); ovs_lock(); if (!ovs_vport_lookup(parms)) return ERR_PTR(-EAFNOSUPPORT); else return ERR_PTR(-EAGAIN); } /** * ovs_vport_set_options - modify existing vport device (for kernel callers) * * @vport: vport to modify. * @options: New configuration. * * Modifies an existing device with the specified configuration (which is * dependent on device type). ovs_mutex must be held. */ int ovs_vport_set_options(struct vport *vport, struct nlattr *options) { if (!vport->ops->set_options) return -EOPNOTSUPP; return vport->ops->set_options(vport, options); } /** * ovs_vport_del - delete existing vport device * * @vport: vport to delete. * * Detaches @vport from its datapath and destroys it. ovs_mutex must * be held. */ void ovs_vport_del(struct vport *vport) { hlist_del_rcu(&vport->hash_node); module_put(vport->ops->owner); vport->ops->destroy(vport); } /** * ovs_vport_get_stats - retrieve device stats * * @vport: vport from which to retrieve the stats * @stats: location to store stats * * Retrieves transmit, receive, and error stats for the given device. * * Must be called with ovs_mutex or rcu_read_lock. */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { const struct rtnl_link_stats64 *dev_stats; struct rtnl_link_stats64 temp; dev_stats = dev_get_stats(vport->dev, &temp); stats->rx_errors = dev_stats->rx_errors; stats->tx_errors = dev_stats->tx_errors; stats->tx_dropped = dev_stats->tx_dropped; stats->rx_dropped = dev_stats->rx_dropped; stats->rx_bytes = dev_stats->rx_bytes; stats->rx_packets = dev_stats->rx_packets; stats->tx_bytes = dev_stats->tx_bytes; stats->tx_packets = dev_stats->tx_packets; } /** * ovs_vport_get_upcall_stats - retrieve upcall stats * * @vport: vport from which to retrieve the stats. * @skb: sk_buff where upcall stats should be appended. * * Retrieves upcall stats for the given device. * * Must be called with ovs_mutex or rcu_read_lock. */ int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb) { struct nlattr *nla; int i; __u64 tx_success = 0; __u64 tx_fail = 0; for_each_possible_cpu(i) { const struct vport_upcall_stats_percpu *stats; unsigned int start; stats = per_cpu_ptr(vport->upcall_stats, i); do { start = u64_stats_fetch_begin(&stats->syncp); tx_success += u64_stats_read(&stats->n_success); tx_fail += u64_stats_read(&stats->n_fail); } while (u64_stats_fetch_retry(&stats->syncp, start)); } nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_UPCALL_STATS); if (!nla) return -EMSGSIZE; if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_SUCCESS, tx_success, OVS_VPORT_ATTR_PAD)) { nla_nest_cancel(skb, nla); return -EMSGSIZE; } if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_FAIL, tx_fail, OVS_VPORT_ATTR_PAD)) { nla_nest_cancel(skb, nla); return -EMSGSIZE; } nla_nest_end(skb, nla); return 0; } /** * ovs_vport_get_options - retrieve device options * * @vport: vport from which to retrieve the options. * @skb: sk_buff where options should be appended. * * Retrieves the configuration of the given device, appending an * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested * vport-specific attributes to @skb. * * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another * negative error code if a real error occurred. If an error occurs, @skb is * left unmodified. * * Must be called with ovs_mutex or rcu_read_lock. */ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) { struct nlattr *nla; int err; if (!vport->ops->get_options) return 0; nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS); if (!nla) return -EMSGSIZE; err = vport->ops->get_options(vport, skb); if (err) { nla_nest_cancel(skb, nla); return err; } nla_nest_end(skb, nla); return 0; } /** * ovs_vport_set_upcall_portids - set upcall portids of @vport. * * @vport: vport to modify. * @ids: new configuration, an array of port ids. * * Sets the vport's upcall_portids to @ids. * * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed * as an array of U32. * * Must be called with ovs_mutex. */ int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids) { struct vport_portids *old, *vport_portids; if (!nla_len(ids) || nla_len(ids) % sizeof(u32)) return -EINVAL; old = ovsl_dereference(vport->upcall_portids); vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids), GFP_KERNEL); if (!vport_portids) return -ENOMEM; vport_portids->n_ids = nla_len(ids) / sizeof(u32); vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids); nla_memcpy(vport_portids->ids, ids, nla_len(ids)); rcu_assign_pointer(vport->upcall_portids, vport_portids); if (old) kfree_rcu(old, rcu); return 0; } /** * ovs_vport_get_upcall_portids - get the upcall_portids of @vport. * * @vport: vport from which to retrieve the portids. * @skb: sk_buff where portids should be appended. * * Retrieves the configuration of the given vport, appending the * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall * portids to @skb. * * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room. * If an error occurs, @skb is left unmodified. Must be called with * ovs_mutex or rcu_read_lock. */ int ovs_vport_get_upcall_portids(const struct vport *vport, struct sk_buff *skb) { struct vport_portids *ids; ids = rcu_dereference_ovsl(vport->upcall_portids); if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS) return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->n_ids * sizeof(u32), (void *)ids->ids); else return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]); } /** * ovs_vport_find_upcall_portid - find the upcall portid to send upcall. * * @vport: vport from which the missed packet is received. * @skb: skb that the missed packet was received. * * Uses the skb_get_hash() to select the upcall portid to send the * upcall. * * Returns the portid of the target socket. Must be called with rcu_read_lock. */ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) { struct vport_portids *ids; u32 ids_index; u32 hash; ids = rcu_dereference(vport->upcall_portids); /* If there is only one portid, select it in the fast-path. */ if (ids->n_ids == 1) return ids->ids[0]; hash = skb_get_hash(skb); ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); return ids->ids[ids_index]; } /** * ovs_vport_receive - pass up received packet to the datapath for processing * * @vport: vport that received the packet * @skb: skb that was received * @tun_info: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, const struct ip_tunnel_info *tun_info) { struct sw_flow_key key; int error; OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; OVS_CB(skb)->probability = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; mark = skb->mark; skb_scrub_packet(skb, true); skb->mark = mark; tun_info = NULL; } /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { kfree_skb(skb); return error; } ovs_dp_process_packet(skb, &key); return 0; } static int packet_length(const struct sk_buff *skb, struct net_device *dev) { int length = skb->len - dev->hard_header_len; if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol)) length -= VLAN_HLEN; /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none * account for 802.1ad. e.g. is_skb_forwardable(). */ return length > 0 ? length : 0; } void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) { int mtu = vport->dev->mtu; switch (vport->dev->type) { case ARPHRD_NONE: if (mac_proto == MAC_PROTO_ETHERNET) { skb_reset_network_header(skb); skb_reset_mac_len(skb); skb->protocol = htons(ETH_P_TEB); } else if (mac_proto != MAC_PROTO_NONE) { WARN_ON_ONCE(1); goto drop; } break; case ARPHRD_ETHER: if (mac_proto != MAC_PROTO_ETHERNET) goto drop; break; default: goto drop; } if (unlikely(packet_length(skb, vport->dev) > mtu && !skb_is_gso(skb))) { vport->dev->stats.tx_errors++; if (vport->dev->flags & IFF_UP) net_warn_ratelimited("%s: dropped over-mtu packet: " "%d > %d\n", vport->dev->name, packet_length(skb, vport->dev), mtu); goto drop; } skb->dev = vport->dev; skb_clear_tstamp(skb); vport->ops->send(skb); return; drop: kfree_skb(skb); }
5 5 15 17 2 3 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 // SPDX-License-Identifier: GPL-2.0-only /* * AT and PS/2 keyboard driver * * Copyright (c) 1999-2002 Vojtech Pavlik */ /* * This driver can handle standard AT keyboards and PS/2 keyboards in * Translated and Raw Set 2 and Set 3, as well as AT keyboards on dumb * input-only controllers and AT keyboards connected over a one way RS232 * converter. */ #include <linux/delay.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/input.h> #include <linux/input/vivaldi-fmap.h> #include <linux/serio.h> #include <linux/workqueue.h> #include <linux/libps2.h> #include <linux/mutex.h> #include <linux/dmi.h> #include <linux/property.h> #define DRIVER_DESC "AT and PS/2 keyboard driver" MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>"); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); static int atkbd_set = 2; module_param_named(set, atkbd_set, int, 0); MODULE_PARM_DESC(set, "Select keyboard code set (2 = default, 3 = PS/2 native)"); #if defined(__i386__) || defined(__x86_64__) || defined(__hppa__) || defined(__loongarch__) static bool atkbd_reset; #else static bool atkbd_reset = true; #endif module_param_named(reset, atkbd_reset, bool, 0); MODULE_PARM_DESC(reset, "Reset keyboard during initialization"); static bool atkbd_softrepeat; module_param_named(softrepeat, atkbd_softrepeat, bool, 0); MODULE_PARM_DESC(softrepeat, "Use software keyboard repeat"); static bool atkbd_softraw = true; module_param_named(softraw, atkbd_softraw, bool, 0); MODULE_PARM_DESC(softraw, "Use software generated rawmode"); static bool atkbd_scroll; module_param_named(scroll, atkbd_scroll, bool, 0); MODULE_PARM_DESC(scroll, "Enable scroll-wheel on MS Office and similar keyboards"); static bool atkbd_extra; module_param_named(extra, atkbd_extra, bool, 0); MODULE_PARM_DESC(extra, "Enable extra LEDs and keys on IBM RapidAcces, EzKey and similar keyboards"); static bool atkbd_terminal; module_param_named(terminal, atkbd_terminal, bool, 0); MODULE_PARM_DESC(terminal, "Enable break codes on an IBM Terminal keyboard connected via AT/PS2"); #define SCANCODE(keymap) ((keymap >> 16) & 0xFFFF) #define KEYCODE(keymap) (keymap & 0xFFFF) /* * Scancode to keycode tables. These are just the default setting, and * are loadable via a userland utility. */ #define ATKBD_KEYMAP_SIZE 512 static const unsigned short atkbd_set2_keycode[ATKBD_KEYMAP_SIZE] = { #ifdef CONFIG_KEYBOARD_ATKBD_HP_KEYCODES /* XXX: need a more general approach */ #include "hpps2atkbd.h" /* include the keyboard scancodes */ #else 0, 67, 65, 63, 61, 59, 60, 88, 0, 68, 66, 64, 62, 15, 41,117, 0, 56, 42, 93, 29, 16, 2, 0, 0, 0, 44, 31, 30, 17, 3, 0, 0, 46, 45, 32, 18, 5, 4, 95, 0, 57, 47, 33, 20, 19, 6,183, 0, 49, 48, 35, 34, 21, 7,184, 0, 0, 50, 36, 22, 8, 9,185, 0, 51, 37, 23, 24, 11, 10, 0, 0, 52, 53, 38, 39, 25, 12, 0, 0, 89, 40, 0, 26, 13, 0,193, 58, 54, 28, 27, 0, 43, 0, 85, 0, 86, 91, 90, 92, 0, 14, 94, 0, 79,124, 75, 71,121, 0, 0, 82, 83, 80, 76, 77, 72, 1, 69, 87, 78, 81, 74, 55, 73, 70, 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 217,100,255, 0, 97,165, 0, 0,156, 0, 0, 0, 0, 0, 0,125, 173,114, 0,113, 0, 0, 0,126,128, 0, 0,140, 0, 0, 0,127, 159, 0,115, 0,164, 0, 0,116,158, 0,172,166, 0, 0, 0,142, 157, 0, 0, 0, 0, 0, 0, 0,155, 0, 98, 0, 0,163, 0, 0, 226, 0, 0, 0, 0, 0, 0, 0, 0,255, 96, 0, 0, 0,143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,107, 0,105,102, 0, 0,112, 110,111,108,112,106,103, 0,119, 0,118,109, 0, 99,104,119, 0, 0, 0, 0, 65, 99, #endif }; static const unsigned short atkbd_set3_keycode[ATKBD_KEYMAP_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 59, 1,138,128,129,130, 15, 41, 60, 131, 29, 42, 86, 58, 16, 2, 61,133, 56, 44, 31, 30, 17, 3, 62, 134, 46, 45, 32, 18, 5, 4, 63,135, 57, 47, 33, 20, 19, 6, 64, 136, 49, 48, 35, 34, 21, 7, 65,137,100, 50, 36, 22, 8, 9, 66, 125, 51, 37, 23, 24, 11, 10, 67,126, 52, 53, 38, 39, 25, 12, 68, 113,114, 40, 43, 26, 13, 87, 99, 97, 54, 28, 27, 43, 43, 88, 70, 108,105,119,103,111,107, 14,110, 0, 79,106, 75, 71,109,102,104, 82, 83, 80, 76, 77, 72, 69, 98, 0, 96, 81, 0, 78, 73, 55,183, 184,185,186,187, 74, 94, 92, 93, 0, 0, 0,125,126,127,112, 0, 0,139,172,163,165,115,152,172,166,140,160,154,113,114,167,168, 148,149,147,140 }; static const unsigned short atkbd_unxlate_table[128] = { 0,118, 22, 30, 38, 37, 46, 54, 61, 62, 70, 69, 78, 85,102, 13, 21, 29, 36, 45, 44, 53, 60, 67, 68, 77, 84, 91, 90, 20, 28, 27, 35, 43, 52, 51, 59, 66, 75, 76, 82, 14, 18, 93, 26, 34, 33, 42, 50, 49, 58, 65, 73, 74, 89,124, 17, 41, 88, 5, 6, 4, 12, 3, 11, 2, 10, 1, 9,119,126,108,117,125,123,107,115,116,121,105, 114,122,112,113,127, 96, 97,120, 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 86, 94, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 87,111, 19, 25, 57, 81, 83, 92, 95, 98, 99,100,101,103,104,106,109,110 }; #define ATKBD_CMD_SETLEDS 0x10ed #define ATKBD_CMD_GSCANSET 0x11f0 #define ATKBD_CMD_SSCANSET 0x10f0 #define ATKBD_CMD_GETID 0x02f2 #define ATKBD_CMD_SETREP 0x10f3 #define ATKBD_CMD_ENABLE 0x00f4 #define ATKBD_CMD_RESET_DIS 0x00f5 /* Reset to defaults and disable */ #define ATKBD_CMD_RESET_DEF 0x00f6 /* Reset to defaults */ #define ATKBD_CMD_SETALL_MB 0x00f8 /* Set all keys to give break codes */ #define ATKBD_CMD_SETALL_MBR 0x00fa /* ... and repeat */ #define ATKBD_CMD_RESET_BAT 0x02ff #define ATKBD_CMD_RESEND 0x00fe #define ATKBD_CMD_EX_ENABLE 0x10ea #define ATKBD_CMD_EX_SETLEDS 0x20eb #define ATKBD_CMD_OK_GETID 0x02e8 #define ATKBD_RET_ACK 0xfa #define ATKBD_RET_NAK 0xfe #define ATKBD_RET_BAT 0xaa #define ATKBD_RET_EMUL0 0xe0 #define ATKBD_RET_EMUL1 0xe1 #define ATKBD_RET_RELEASE 0xf0 #define ATKBD_RET_HANJA 0xf1 #define ATKBD_RET_HANGEUL 0xf2 #define ATKBD_RET_ERR 0xff #define ATKBD_KEY_UNKNOWN 0 #define ATKBD_KEY_NULL 255 #define ATKBD_SCR_1 0xfffe #define ATKBD_SCR_2 0xfffd #define ATKBD_SCR_4 0xfffc #define ATKBD_SCR_8 0xfffb #define ATKBD_SCR_CLICK 0xfffa #define ATKBD_SCR_LEFT 0xfff9 #define ATKBD_SCR_RIGHT 0xfff8 #define ATKBD_SPECIAL ATKBD_SCR_RIGHT #define ATKBD_LED_EVENT_BIT 0 #define ATKBD_REP_EVENT_BIT 1 #define ATKBD_XL_ERR 0x01 #define ATKBD_XL_BAT 0x02 #define ATKBD_XL_ACK 0x04 #define ATKBD_XL_NAK 0x08 #define ATKBD_XL_HANGEUL 0x10 #define ATKBD_XL_HANJA 0x20 static const struct { unsigned short keycode; unsigned char set2; } atkbd_scroll_keys[] = { { ATKBD_SCR_1, 0xc5 }, { ATKBD_SCR_2, 0x9d }, { ATKBD_SCR_4, 0xa4 }, { ATKBD_SCR_8, 0x9b }, { ATKBD_SCR_CLICK, 0xe0 }, { ATKBD_SCR_LEFT, 0xcb }, { ATKBD_SCR_RIGHT, 0xd2 }, }; /* * The atkbd control structure */ struct atkbd { struct ps2dev ps2dev; struct input_dev *dev; /* Written only during init */ char name[64]; char phys[32]; unsigned short id; unsigned short keycode[ATKBD_KEYMAP_SIZE]; DECLARE_BITMAP(force_release_mask, ATKBD_KEYMAP_SIZE); unsigned char set; bool translated; bool extra; bool write; bool softrepeat; bool softraw; bool scroll; bool enabled; /* Accessed only from interrupt */ unsigned char emul; bool resend; bool release; unsigned long xl_bit; unsigned int last; unsigned long time; unsigned long err_count; struct delayed_work event_work; unsigned long event_jiffies; unsigned long event_mask; /* Serializes reconnect(), attr->set() and event work */ struct mutex mutex; struct vivaldi_data vdata; }; /* * System-specific keymap fixup routine */ static void (*atkbd_platform_fixup)(struct atkbd *, const void *data); static void *atkbd_platform_fixup_data; static unsigned int (*atkbd_platform_scancode_fixup)(struct atkbd *, unsigned int); /* * Certain keyboards to not like ATKBD_CMD_RESET_DIS and stop responding * to many commands until full reset (ATKBD_CMD_RESET_BAT) is performed. */ static bool atkbd_skip_deactivate; static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf, ssize_t (*handler)(struct atkbd *, char *)); static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count, ssize_t (*handler)(struct atkbd *, const char *, size_t)); #define ATKBD_DEFINE_ATTR(_name) \ static ssize_t atkbd_show_##_name(struct atkbd *, char *); \ static ssize_t atkbd_set_##_name(struct atkbd *, const char *, size_t); \ static ssize_t atkbd_do_show_##_name(struct device *d, \ struct device_attribute *attr, char *b) \ { \ return atkbd_attr_show_helper(d, b, atkbd_show_##_name); \ } \ static ssize_t atkbd_do_set_##_name(struct device *d, \ struct device_attribute *attr, const char *b, size_t s) \ { \ return atkbd_attr_set_helper(d, b, s, atkbd_set_##_name); \ } \ static struct device_attribute atkbd_attr_##_name = \ __ATTR(_name, S_IWUSR | S_IRUGO, atkbd_do_show_##_name, atkbd_do_set_##_name); ATKBD_DEFINE_ATTR(extra); ATKBD_DEFINE_ATTR(force_release); ATKBD_DEFINE_ATTR(scroll); ATKBD_DEFINE_ATTR(set); ATKBD_DEFINE_ATTR(softrepeat); ATKBD_DEFINE_ATTR(softraw); #define ATKBD_DEFINE_RO_ATTR(_name) \ static ssize_t atkbd_show_##_name(struct atkbd *, char *); \ static ssize_t atkbd_do_show_##_name(struct device *d, \ struct device_attribute *attr, char *b) \ { \ return atkbd_attr_show_helper(d, b, atkbd_show_##_name); \ } \ static struct device_attribute atkbd_attr_##_name = \ __ATTR(_name, S_IRUGO, atkbd_do_show_##_name, NULL); ATKBD_DEFINE_RO_ATTR(err_count); ATKBD_DEFINE_RO_ATTR(function_row_physmap); static struct attribute *atkbd_attributes[] = { &atkbd_attr_extra.attr, &atkbd_attr_force_release.attr, &atkbd_attr_scroll.attr, &atkbd_attr_set.attr, &atkbd_attr_softrepeat.attr, &atkbd_attr_softraw.attr, &atkbd_attr_err_count.attr, &atkbd_attr_function_row_physmap.attr, NULL }; static ssize_t atkbd_show_function_row_physmap(struct atkbd *atkbd, char *buf) { return vivaldi_function_row_physmap_show(&atkbd->vdata, buf); } static struct atkbd *atkbd_from_serio(struct serio *serio) { struct ps2dev *ps2dev = serio_get_drvdata(serio); return container_of(ps2dev, struct atkbd, ps2dev); } static umode_t atkbd_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = kobj_to_dev(kobj); struct serio *serio = to_serio_port(dev); struct atkbd *atkbd = atkbd_from_serio(serio); if (attr == &atkbd_attr_function_row_physmap.attr && !atkbd->vdata.num_function_row_keys) return 0; return attr->mode; } static const struct attribute_group atkbd_attribute_group = { .attrs = atkbd_attributes, .is_visible = atkbd_attr_is_visible, }; __ATTRIBUTE_GROUPS(atkbd_attribute); static const unsigned int xl_table[] = { ATKBD_RET_BAT, ATKBD_RET_ERR, ATKBD_RET_ACK, ATKBD_RET_NAK, ATKBD_RET_HANJA, ATKBD_RET_HANGEUL, }; /* * Checks if we should mangle the scancode to extract 'release' bit * in translated mode. */ static bool atkbd_need_xlate(unsigned long xl_bit, unsigned char code) { int i; if (code == ATKBD_RET_EMUL0 || code == ATKBD_RET_EMUL1) return false; for (i = 0; i < ARRAY_SIZE(xl_table); i++) if (code == xl_table[i]) return test_bit(i, &xl_bit); return true; } /* * Calculates new value of xl_bit so the driver can distinguish * between make/break pair of scancodes for select keys and PS/2 * protocol responses. */ static void atkbd_calculate_xl_bit(struct atkbd *atkbd, unsigned char code) { int i; for (i = 0; i < ARRAY_SIZE(xl_table); i++) { if (!((code ^ xl_table[i]) & 0x7f)) { if (code & 0x80) __clear_bit(i, &atkbd->xl_bit); else __set_bit(i, &atkbd->xl_bit); break; } } } /* * Encode the scancode, 0xe0 prefix, and high bit into a single integer, * keeping kernel 2.4 compatibility for set 2 */ static unsigned int atkbd_compat_scancode(struct atkbd *atkbd, unsigned int code) { if (atkbd->set == 3) { if (atkbd->emul == 1) code |= 0x100; } else { code = (code & 0x7f) | ((code & 0x80) << 1); if (atkbd->emul == 1) code |= 0x80; } return code; } /* * Tries to handle frame or parity error by requesting the keyboard controller * to resend the last byte. This historically not done on x86 as controllers * there typically do not implement this command. */ static bool __maybe_unused atkbd_handle_frame_error(struct ps2dev *ps2dev, u8 data, unsigned int flags) { struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev); struct serio *serio = ps2dev->serio; if ((flags & (SERIO_FRAME | SERIO_PARITY)) && (~flags & SERIO_TIMEOUT) && !atkbd->resend && atkbd->write) { dev_warn(&serio->dev, "Frame/parity error: %02x\n", flags); serio_write(serio, ATKBD_CMD_RESEND); atkbd->resend = true; return true; } if (!flags && data == ATKBD_RET_ACK) atkbd->resend = false; return false; } static enum ps2_disposition atkbd_pre_receive_byte(struct ps2dev *ps2dev, u8 data, unsigned int flags) { struct serio *serio = ps2dev->serio; dev_dbg(&serio->dev, "Received %02x flags %02x\n", data, flags); #if !defined(__i386__) && !defined (__x86_64__) if (atkbd_handle_frame_error(ps2dev, data, flags)) return PS2_IGNORE; #endif return PS2_PROCESS; } static void atkbd_receive_byte(struct ps2dev *ps2dev, u8 data) { struct serio *serio = ps2dev->serio; struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev); struct input_dev *dev = atkbd->dev; unsigned int code = data; int scroll = 0, hscroll = 0, click = -1; int value; unsigned short keycode; pm_wakeup_event(&serio->dev, 0); if (!atkbd->enabled) return; input_event(dev, EV_MSC, MSC_RAW, code); if (atkbd_platform_scancode_fixup) code = atkbd_platform_scancode_fixup(atkbd, code); if (atkbd->translated) { if (atkbd->emul || atkbd_need_xlate(atkbd->xl_bit, code)) { atkbd->release = code >> 7; code &= 0x7f; } if (!atkbd->emul) atkbd_calculate_xl_bit(atkbd, data); } switch (code) { case ATKBD_RET_BAT: atkbd->enabled = false; serio_reconnect(atkbd->ps2dev.serio); return; case ATKBD_RET_EMUL0: atkbd->emul = 1; return; case ATKBD_RET_EMUL1: atkbd->emul = 2; return; case ATKBD_RET_RELEASE: atkbd->release = true; return; case ATKBD_RET_ACK: case ATKBD_RET_NAK: if (printk_ratelimit()) dev_warn(&serio->dev, "Spurious %s on %s. " "Some program might be trying to access hardware directly.\n", data == ATKBD_RET_ACK ? "ACK" : "NAK", serio->phys); return; case ATKBD_RET_ERR: atkbd->err_count++; dev_dbg(&serio->dev, "Keyboard on %s reports too many keys pressed.\n", serio->phys); return; } code = atkbd_compat_scancode(atkbd, code); if (atkbd->emul && --atkbd->emul) return; keycode = atkbd->keycode[code]; if (!(atkbd->release && test_bit(code, atkbd->force_release_mask))) if (keycode != ATKBD_KEY_NULL) input_event(dev, EV_MSC, MSC_SCAN, code); switch (keycode) { case ATKBD_KEY_NULL: break; case ATKBD_KEY_UNKNOWN: dev_warn(&serio->dev, "Unknown key %s (%s set %d, code %#x on %s).\n", atkbd->release ? "released" : "pressed", atkbd->translated ? "translated" : "raw", atkbd->set, code, serio->phys); dev_warn(&serio->dev, "Use 'setkeycodes %s%02x <keycode>' to make it known.\n", code & 0x80 ? "e0" : "", code & 0x7f); input_sync(dev); break; case ATKBD_SCR_1: scroll = 1; break; case ATKBD_SCR_2: scroll = 2; break; case ATKBD_SCR_4: scroll = 4; break; case ATKBD_SCR_8: scroll = 8; break; case ATKBD_SCR_CLICK: click = !atkbd->release; break; case ATKBD_SCR_LEFT: hscroll = -1; break; case ATKBD_SCR_RIGHT: hscroll = 1; break; default: if (atkbd->release) { value = 0; atkbd->last = 0; } else if (!atkbd->softrepeat && test_bit(keycode, dev->key)) { /* Workaround Toshiba laptop multiple keypress */ value = time_before(jiffies, atkbd->time) && atkbd->last == code ? 1 : 2; } else { value = 1; atkbd->last = code; atkbd->time = jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]) / 2; } input_event(dev, EV_KEY, keycode, value); input_sync(dev); if (value && test_bit(code, atkbd->force_release_mask)) { input_event(dev, EV_MSC, MSC_SCAN, code); input_report_key(dev, keycode, 0); input_sync(dev); } } if (atkbd->scroll) { if (click != -1) input_report_key(dev, BTN_MIDDLE, click); input_report_rel(dev, REL_WHEEL, atkbd->release ? -scroll : scroll); input_report_rel(dev, REL_HWHEEL, hscroll); input_sync(dev); } atkbd->release = false; } static int atkbd_set_repeat_rate(struct atkbd *atkbd) { const short period[32] = { 33, 37, 42, 46, 50, 54, 58, 63, 67, 75, 83, 92, 100, 109, 116, 125, 133, 149, 167, 182, 200, 217, 232, 250, 270, 303, 333, 370, 400, 435, 470, 500 }; const short delay[4] = { 250, 500, 750, 1000 }; struct input_dev *dev = atkbd->dev; unsigned char param; int i = 0, j = 0; while (i < ARRAY_SIZE(period) - 1 && period[i] < dev->rep[REP_PERIOD]) i++; dev->rep[REP_PERIOD] = period[i]; while (j < ARRAY_SIZE(delay) - 1 && delay[j] < dev->rep[REP_DELAY]) j++; dev->rep[REP_DELAY] = delay[j]; param = i | (j << 5); return ps2_command(&atkbd->ps2dev, &param, ATKBD_CMD_SETREP); } static int atkbd_set_leds(struct atkbd *atkbd) { struct input_dev *dev = atkbd->dev; unsigned char param[2]; param[0] = (test_bit(LED_SCROLLL, dev->led) ? 1 : 0) | (test_bit(LED_NUML, dev->led) ? 2 : 0) | (test_bit(LED_CAPSL, dev->led) ? 4 : 0); if (ps2_command(&atkbd->ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; if (atkbd->extra) { param[0] = 0; param[1] = (test_bit(LED_COMPOSE, dev->led) ? 0x01 : 0) | (test_bit(LED_SLEEP, dev->led) ? 0x02 : 0) | (test_bit(LED_SUSPEND, dev->led) ? 0x04 : 0) | (test_bit(LED_MISC, dev->led) ? 0x10 : 0) | (test_bit(LED_MUTE, dev->led) ? 0x20 : 0); if (ps2_command(&atkbd->ps2dev, param, ATKBD_CMD_EX_SETLEDS)) return -1; } return 0; } /* * atkbd_event_work() is used to complete processing of events that * can not be processed by input_event() which is often called from * interrupt context. */ static void atkbd_event_work(struct work_struct *work) { struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work); guard(mutex)(&atkbd->mutex); if (!atkbd->enabled) { /* * Serio ports are resumed asynchronously so while driver core * thinks that device is already fully operational in reality * it may not be ready yet. In this case we need to keep * rescheduling till reconnect completes. */ schedule_delayed_work(&atkbd->event_work, msecs_to_jiffies(100)); } else { if (test_and_clear_bit(ATKBD_LED_EVENT_BIT, &atkbd->event_mask)) atkbd_set_leds(atkbd); if (test_and_clear_bit(ATKBD_REP_EVENT_BIT, &atkbd->event_mask)) atkbd_set_repeat_rate(atkbd); } } /* * Schedule switch for execution. We need to throttle requests, * otherwise keyboard may become unresponsive. */ static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit) { unsigned long delay = msecs_to_jiffies(50); if (time_after(jiffies, atkbd->event_jiffies + delay)) delay = 0; atkbd->event_jiffies = jiffies; set_bit(event_bit, &atkbd->event_mask); mb(); schedule_delayed_work(&atkbd->event_work, delay); } /* * Event callback from the input module. Events that change the state of * the hardware are processed here. If action can not be performed in * interrupt context it is offloaded to atkbd_event_work. */ static int atkbd_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct atkbd *atkbd = input_get_drvdata(dev); if (!atkbd->write) return -1; switch (type) { case EV_LED: atkbd_schedule_event_work(atkbd, ATKBD_LED_EVENT_BIT); return 0; case EV_REP: if (!atkbd->softrepeat) atkbd_schedule_event_work(atkbd, ATKBD_REP_EVENT_BIT); return 0; default: return -1; } } /* * atkbd_enable() signals that interrupt handler is allowed to * generate input events. */ static inline void atkbd_enable(struct atkbd *atkbd) { guard(serio_pause_rx)(atkbd->ps2dev.serio); atkbd->enabled = true; } /* * atkbd_disable() tells input handler that all incoming data except * for ACKs and command response should be dropped. */ static inline void atkbd_disable(struct atkbd *atkbd) { guard(serio_pause_rx)(atkbd->ps2dev.serio); atkbd->enabled = false; } static int atkbd_activate(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; /* * Enable the keyboard to receive keystrokes. */ if (ps2_command(ps2dev, NULL, ATKBD_CMD_ENABLE)) { dev_err(&ps2dev->serio->dev, "Failed to enable keyboard on %s\n", ps2dev->serio->phys); return -1; } return 0; } /* * atkbd_deactivate() resets and disables the keyboard from sending * keystrokes. */ static void atkbd_deactivate(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; if (ps2_command(ps2dev, NULL, ATKBD_CMD_RESET_DIS)) dev_err(&ps2dev->serio->dev, "Failed to deactivate keyboard on %s\n", ps2dev->serio->phys); } #ifdef CONFIG_X86 static bool atkbd_is_portable_device(void) { static const char * const chassis_types[] = { "8", /* Portable */ "9", /* Laptop */ "10", /* Notebook */ "14", /* Sub-Notebook */ "31", /* Convertible */ "32", /* Detachable */ }; int i; for (i = 0; i < ARRAY_SIZE(chassis_types); i++) if (dmi_match(DMI_CHASSIS_TYPE, chassis_types[i])) return true; return false; } /* * On many modern laptops ATKBD_CMD_GETID may cause problems, on these laptops * the controller is always in translated mode. In this mode mice/touchpads will * not work. So in this case simply assume a keyboard is connected to avoid * confusing some laptop keyboards. * * Skipping ATKBD_CMD_GETID ends up using a fake keyboard id. Using the standard * 0xab83 id is ok in translated mode, only atkbd_select_set() checks atkbd->id * and in translated mode that is a no-op. */ static bool atkbd_skip_getid(struct atkbd *atkbd) { return atkbd->translated && atkbd_is_portable_device(); } #else static inline bool atkbd_skip_getid(struct atkbd *atkbd) { return false; } #endif /* * atkbd_probe() probes for an AT keyboard on a serio port. */ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; /* * Some systems, where the bit-twiddling when testing the io-lines of the * controller may confuse the keyboard need a full reset of the keyboard. On * these systems the BIOS also usually doesn't do it for us. */ if (atkbd_reset) if (ps2_command(ps2dev, NULL, ATKBD_CMD_RESET_BAT)) dev_warn(&ps2dev->serio->dev, "keyboard reset failed on %s\n", ps2dev->serio->phys); if (atkbd_skip_getid(atkbd)) { atkbd->id = 0xab83; goto deactivate_kbd; } /* * Then we check the keyboard ID. We should get 0xab83 under normal conditions. * Some keyboards report different values, but the first byte is always 0xab or * 0xac. Some old AT keyboards don't report anything. If a mouse is connected, this * should make sure we don't try to set the LEDs on it. */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* * If the get ID command failed, we check if we can at least set * the LEDs on the keyboard. This should work on every keyboard out there. * It also turns the LEDs off, which we want anyway. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; atkbd->id = 0xabba; return 0; } if (!ps2_is_keyboard_id(param[0])) return -1; atkbd->id = (param[0] << 8) | param[1]; if (atkbd->id == 0xaca1 && atkbd->translated) { dev_err(&ps2dev->serio->dev, "NCD terminal keyboards are only supported on non-translating controllers. " "Use i8042.direct=1 to disable translation.\n"); return -1; } deactivate_kbd: /* * Make sure nothing is coming from the keyboard and disturbs our * internal state. */ if (!atkbd_skip_deactivate) atkbd_deactivate(atkbd); return 0; } /* * atkbd_select_set checks if a keyboard has a working Set 3 support, and * sets it into that. Unfortunately there are keyboards that can be switched * to Set 3, but don't work well in that (BTC Multimedia ...) */ static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; atkbd->extra = false; /* * For known special keyboards we can go ahead and set the correct set. * We check for NCD PS/2 Sun, NorthGate OmniKey 101 and * IBM RapidAccess / IBM EzButton / Chicony KBP-8993 keyboards. */ if (atkbd->translated) return 2; if (atkbd->id == 0xaca1) { param[0] = 3; ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET); return 3; } if (allow_extra) { param[0] = 0x71; if (!ps2_command(ps2dev, param, ATKBD_CMD_EX_ENABLE)) { atkbd->extra = true; return 2; } } if (atkbd_terminal) { ps2_command(ps2dev, param, ATKBD_CMD_SETALL_MB); return 3; } if (target_set != 3) return 2; if (!ps2_command(ps2dev, param, ATKBD_CMD_OK_GETID)) { atkbd->id = param[0] << 8 | param[1]; return 2; } param[0] = 3; if (ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET)) return 2; param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_GSCANSET)) return 2; if (param[0] != 3) { param[0] = 2; if (ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET)) return 2; } ps2_command(ps2dev, param, ATKBD_CMD_SETALL_MBR); return 3; } static int atkbd_reset_state(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[1]; /* * Set the LEDs to a predefined state (all off). */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; /* * Set autorepeat to fastest possible. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP)) return -1; return 0; } /* * atkbd_cleanup() restores the keyboard state so that BIOS is happy after a * reboot. */ static void atkbd_cleanup(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); atkbd_disable(atkbd); ps2_command(&atkbd->ps2dev, NULL, ATKBD_CMD_RESET_DEF); } /* * atkbd_disconnect() closes and frees. */ static void atkbd_disconnect(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); atkbd_disable(atkbd); input_unregister_device(atkbd->dev); /* * Make sure we don't have a command in flight. * Note that since atkbd->enabled is false event work will keep * rescheduling itself until it gets canceled and will not try * accessing freed input device or serio port. */ cancel_delayed_work_sync(&atkbd->event_work); serio_close(serio); serio_set_drvdata(serio, NULL); kfree(atkbd); } /* * generate release events for the keycodes given in data */ static void atkbd_apply_forced_release_keylist(struct atkbd* atkbd, const void *data) { const unsigned int *keys = data; unsigned int i; if (atkbd->set == 2) for (i = 0; keys[i] != -1U; i++) __set_bit(keys[i], atkbd->force_release_mask); } /* * Most special keys (Fn+F?) on Dell laptops do not generate release * events so we have to do it ourselves. */ static unsigned int atkbd_dell_laptop_forced_release_keys[] = { 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8f, 0x93, -1U }; /* * Perform fixup for HP system that doesn't generate release * for its video switch */ static unsigned int atkbd_hp_forced_release_keys[] = { 0x94, -1U }; /* * Samsung NC10,NC20 with Fn+F? key release not working */ static unsigned int atkbd_samsung_forced_release_keys[] = { 0x82, 0x83, 0x84, 0x86, 0x88, 0x89, 0xb3, 0xf7, 0xf9, -1U }; /* * Amilo Pi 3525 key release for Fn+Volume keys not working */ static unsigned int atkbd_amilo_pi3525_forced_release_keys[] = { 0x20, 0xa0, 0x2e, 0xae, 0x30, 0xb0, -1U }; /* * Amilo Xi 3650 key release for light touch bar not working */ static unsigned int atkbd_amilo_xi3650_forced_release_keys[] = { 0x67, 0xed, 0x90, 0xa2, 0x99, 0xa4, 0xae, 0xb0, -1U }; /* * Soltech TA12 system with broken key release on volume keys and mute key */ static unsigned int atkdb_soltech_ta12_forced_release_keys[] = { 0xa0, 0xae, 0xb0, -1U }; /* * Many notebooks don't send key release event for volume up/down * keys, with key list below common among them */ static unsigned int atkbd_volume_forced_release_keys[] = { 0xae, 0xb0, -1U }; /* * OQO 01+ multimedia keys (64--66) generate e0 6x upon release whereas * they should be generating e4-e6 (0x80 | code). */ static unsigned int atkbd_oqo_01plus_scancode_fixup(struct atkbd *atkbd, unsigned int code) { if (atkbd->translated && atkbd->emul == 1 && (code == 0x64 || code == 0x65 || code == 0x66)) { atkbd->emul = 0; code |= 0x80; } return code; } static int atkbd_get_keymap_from_fwnode(struct atkbd *atkbd) { struct device *dev = &atkbd->ps2dev.serio->dev; int i, n; u32 *ptr; u16 scancode, keycode; /* Parse "linux,keymap" property */ n = device_property_count_u32(dev, "linux,keymap"); if (n <= 0 || n > ATKBD_KEYMAP_SIZE) return -ENXIO; ptr = kcalloc(n, sizeof(u32), GFP_KERNEL); if (!ptr) return -ENOMEM; if (device_property_read_u32_array(dev, "linux,keymap", ptr, n)) { dev_err(dev, "problem parsing FW keymap property\n"); kfree(ptr); return -EINVAL; } memset(atkbd->keycode, 0, sizeof(atkbd->keycode)); for (i = 0; i < n; i++) { scancode = SCANCODE(ptr[i]); keycode = KEYCODE(ptr[i]); atkbd->keycode[scancode] = keycode; } kfree(ptr); return 0; } /* * atkbd_set_keycode_table() initializes keyboard's keycode table * according to the selected scancode set */ static void atkbd_set_keycode_table(struct atkbd *atkbd) { struct device *dev = &atkbd->ps2dev.serio->dev; unsigned int scancode; int i, j; memset(atkbd->keycode, 0, sizeof(atkbd->keycode)); bitmap_zero(atkbd->force_release_mask, ATKBD_KEYMAP_SIZE); if (!atkbd_get_keymap_from_fwnode(atkbd)) { dev_dbg(dev, "Using FW keymap\n"); } else if (atkbd->translated) { for (i = 0; i < 128; i++) { scancode = atkbd_unxlate_table[i]; atkbd->keycode[i] = atkbd_set2_keycode[scancode]; atkbd->keycode[i | 0x80] = atkbd_set2_keycode[scancode | 0x80]; if (atkbd->scroll) for (j = 0; j < ARRAY_SIZE(atkbd_scroll_keys); j++) if ((scancode | 0x80) == atkbd_scroll_keys[j].set2) atkbd->keycode[i | 0x80] = atkbd_scroll_keys[j].keycode; } } else if (atkbd->set == 3) { memcpy(atkbd->keycode, atkbd_set3_keycode, sizeof(atkbd->keycode)); } else { memcpy(atkbd->keycode, atkbd_set2_keycode, sizeof(atkbd->keycode)); if (atkbd->scroll) for (i = 0; i < ARRAY_SIZE(atkbd_scroll_keys); i++) { scancode = atkbd_scroll_keys[i].set2; atkbd->keycode[scancode] = atkbd_scroll_keys[i].keycode; } } /* * HANGEUL and HANJA keys do not send release events so we need to * generate such events ourselves */ scancode = atkbd_compat_scancode(atkbd, ATKBD_RET_HANGEUL); atkbd->keycode[scancode] = KEY_HANGEUL; __set_bit(scancode, atkbd->force_release_mask); scancode = atkbd_compat_scancode(atkbd, ATKBD_RET_HANJA); atkbd->keycode[scancode] = KEY_HANJA; __set_bit(scancode, atkbd->force_release_mask); /* * Perform additional fixups */ if (atkbd_platform_fixup) atkbd_platform_fixup(atkbd, atkbd_platform_fixup_data); } /* * atkbd_set_device_attrs() sets up keyboard's input device structure */ static void atkbd_set_device_attrs(struct atkbd *atkbd) { struct input_dev *input_dev = atkbd->dev; int i; if (atkbd->extra) snprintf(atkbd->name, sizeof(atkbd->name), "AT Set 2 Extra keyboard"); else snprintf(atkbd->name, sizeof(atkbd->name), "AT %s Set %d keyboard", atkbd->translated ? "Translated" : "Raw", atkbd->set); scnprintf(atkbd->phys, sizeof(atkbd->phys), "%s/input0", atkbd->ps2dev.serio->phys); input_dev->name = atkbd->name; input_dev->phys = atkbd->phys; input_dev->id.bustype = BUS_I8042; input_dev->id.vendor = 0x0001; input_dev->id.product = atkbd->translated ? 1 : atkbd->set; input_dev->id.version = atkbd->id; input_dev->event = atkbd_event; input_dev->dev.parent = &atkbd->ps2dev.serio->dev; input_set_drvdata(input_dev, atkbd); input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_MSC); if (atkbd->write) { input_dev->evbit[0] |= BIT_MASK(EV_LED); input_dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) | BIT_MASK(LED_SCROLLL); } if (atkbd->extra) input_dev->ledbit[0] |= BIT_MASK(LED_COMPOSE) | BIT_MASK(LED_SUSPEND) | BIT_MASK(LED_SLEEP) | BIT_MASK(LED_MUTE) | BIT_MASK(LED_MISC); if (!atkbd->softrepeat) { input_dev->rep[REP_DELAY] = 250; input_dev->rep[REP_PERIOD] = 33; } input_dev->mscbit[0] = atkbd->softraw ? BIT_MASK(MSC_SCAN) : BIT_MASK(MSC_RAW) | BIT_MASK(MSC_SCAN); if (atkbd->scroll) { input_dev->evbit[0] |= BIT_MASK(EV_REL); input_dev->relbit[0] = BIT_MASK(REL_WHEEL) | BIT_MASK(REL_HWHEEL); __set_bit(BTN_MIDDLE, input_dev->keybit); } input_dev->keycode = atkbd->keycode; input_dev->keycodesize = sizeof(unsigned short); input_dev->keycodemax = ARRAY_SIZE(atkbd_set2_keycode); for (i = 0; i < ATKBD_KEYMAP_SIZE; i++) { if (atkbd->keycode[i] != KEY_RESERVED && atkbd->keycode[i] != ATKBD_KEY_NULL && atkbd->keycode[i] < ATKBD_SPECIAL) { __set_bit(atkbd->keycode[i], input_dev->keybit); } } } static void atkbd_parse_fwnode_data(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); struct device *dev = &serio->dev; int n; /* Parse "function-row-physmap" property */ n = device_property_count_u32(dev, "function-row-physmap"); if (n > 0 && n <= VIVALDI_MAX_FUNCTION_ROW_KEYS && !device_property_read_u32_array(dev, "function-row-physmap", atkbd->vdata.function_row_physmap, n)) { atkbd->vdata.num_function_row_keys = n; dev_dbg(dev, "FW reported %d function-row key locations\n", n); } } /* * atkbd_connect() is called when the serio module finds an interface * that isn't handled yet by an appropriate device driver. We check if * there is an AT keyboard out there and if yes, we register ourselves * to the input module. */ static int atkbd_connect(struct serio *serio, struct serio_driver *drv) { struct atkbd *atkbd; struct input_dev *dev; int err = -ENOMEM; atkbd = kzalloc(sizeof(*atkbd), GFP_KERNEL); dev = input_allocate_device(); if (!atkbd || !dev) goto fail1; atkbd->dev = dev; ps2_init(&atkbd->ps2dev, serio, atkbd_pre_receive_byte, atkbd_receive_byte); INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work); mutex_init(&atkbd->mutex); switch (serio->id.type) { case SERIO_8042_XL: atkbd->translated = true; fallthrough; case SERIO_8042: if (serio->write) atkbd->write = true; break; } atkbd->softraw = atkbd_softraw; atkbd->softrepeat = atkbd_softrepeat; atkbd->scroll = atkbd_scroll; if (atkbd->softrepeat) atkbd->softraw = true; serio_set_drvdata(serio, atkbd); err = serio_open(serio, drv); if (err) goto fail2; if (atkbd->write) { if (atkbd_probe(atkbd)) { err = -ENODEV; goto fail3; } atkbd->set = atkbd_select_set(atkbd, atkbd_set, atkbd_extra); atkbd_reset_state(atkbd); } else { atkbd->set = 2; atkbd->id = 0xab00; } atkbd_parse_fwnode_data(serio); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); atkbd_enable(atkbd); if (serio->write) atkbd_activate(atkbd); err = input_register_device(atkbd->dev); if (err) goto fail3; return 0; fail3: serio_close(serio); fail2: serio_set_drvdata(serio, NULL); fail1: input_free_device(dev); kfree(atkbd); return err; } /* * atkbd_reconnect() tries to restore keyboard into a sane state and is * most likely called on resume. */ static int atkbd_reconnect(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); struct serio_driver *drv = serio->drv; int error; if (!atkbd || !drv) { dev_dbg(&serio->dev, "reconnect request, but serio is disconnected, ignoring...\n"); return -1; } guard(mutex)(&atkbd->mutex); atkbd_disable(atkbd); if (atkbd->write) { error = atkbd_probe(atkbd); if (error) return error; if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra)) return -EIO; /* * Restore LED state and repeat rate. While input core * will do this for us at resume time reconnect may happen * because user requested it via sysfs or simply because * keyboard was unplugged and plugged in again so we need * to do it ourselves here. */ atkbd_set_leds(atkbd); if (!atkbd->softrepeat) atkbd_set_repeat_rate(atkbd); } /* * Reset our state machine in case reconnect happened in the middle * of multi-byte scancode. */ atkbd->xl_bit = 0; atkbd->emul = 0; atkbd_enable(atkbd); if (atkbd->write) atkbd_activate(atkbd); return 0; } static const struct serio_device_id atkbd_serio_ids[] = { { .type = SERIO_8042, .proto = SERIO_ANY, .id = SERIO_ANY, .extra = SERIO_ANY, }, { .type = SERIO_8042_XL, .proto = SERIO_ANY, .id = SERIO_ANY, .extra = SERIO_ANY, }, { .type = SERIO_RS232, .proto = SERIO_PS2SER, .id = SERIO_ANY, .extra = SERIO_ANY, }, { 0 } }; MODULE_DEVICE_TABLE(serio, atkbd_serio_ids); static struct serio_driver atkbd_drv = { .driver = { .name = "atkbd", .dev_groups = atkbd_attribute_groups, }, .description = DRIVER_DESC, .id_table = atkbd_serio_ids, .interrupt = ps2_interrupt, .connect = atkbd_connect, .reconnect = atkbd_reconnect, .disconnect = atkbd_disconnect, .cleanup = atkbd_cleanup, }; static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf, ssize_t (*handler)(struct atkbd *, char *)) { struct serio *serio = to_serio_port(dev); struct atkbd *atkbd = atkbd_from_serio(serio); return handler(atkbd, buf); } static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count, ssize_t (*handler)(struct atkbd *, const char *, size_t)) { struct serio *serio = to_serio_port(dev); struct atkbd *atkbd = atkbd_from_serio(serio); int retval; scoped_guard(mutex_intr, &atkbd->mutex) { atkbd_disable(atkbd); retval = handler(atkbd, buf, count); atkbd_enable(atkbd); return retval; } return -EINTR; } static ssize_t atkbd_show_extra(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->extra ? 1 : 0); } static ssize_t atkbd_set_extra(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_extra; unsigned char old_set; if (!atkbd->write) return -EIO; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->extra != value) { /* * Since device's properties will change we need to * unregister old device. But allocate and register * new one first to make sure we have it. */ old_dev = atkbd->dev; old_extra = atkbd->extra; old_set = atkbd->set; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->set = atkbd_select_set(atkbd, atkbd->set, value); atkbd_reset_state(atkbd); atkbd_activate(atkbd); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->set = atkbd_select_set(atkbd, old_set, old_extra); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_force_release(struct atkbd *atkbd, char *buf) { size_t len = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", ATKBD_KEYMAP_SIZE, atkbd->force_release_mask); buf[len++] = '\n'; buf[len] = '\0'; return len; } static ssize_t atkbd_set_force_release(struct atkbd *atkbd, const char *buf, size_t count) { /* 64 bytes on stack should be acceptable */ DECLARE_BITMAP(new_mask, ATKBD_KEYMAP_SIZE); int err; err = bitmap_parselist(buf, new_mask, ATKBD_KEYMAP_SIZE); if (err) return err; memcpy(atkbd->force_release_mask, new_mask, sizeof(atkbd->force_release_mask)); return count; } static ssize_t atkbd_show_scroll(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->scroll ? 1 : 0); } static ssize_t atkbd_set_scroll(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_scroll; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->scroll != value) { old_dev = atkbd->dev; old_scroll = atkbd->scroll; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->scroll = value; atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->scroll = old_scroll; atkbd->dev = old_dev; atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_set(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->set); } static ssize_t atkbd_set_set(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; unsigned char old_set; bool old_extra; if (!atkbd->write) return -EIO; err = kstrtouint(buf, 10, &value); if (err) return err; if (value != 2 && value != 3) return -EINVAL; if (atkbd->set != value) { old_dev = atkbd->dev; old_extra = atkbd->extra; old_set = atkbd->set; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->set = atkbd_select_set(atkbd, value, atkbd->extra); atkbd_reset_state(atkbd); atkbd_activate(atkbd); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->set = atkbd_select_set(atkbd, old_set, old_extra); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_softrepeat(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->softrepeat ? 1 : 0); } static ssize_t atkbd_set_softrepeat(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_softrepeat, old_softraw; if (!atkbd->write) return -EIO; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->softrepeat != value) { old_dev = atkbd->dev; old_softrepeat = atkbd->softrepeat; old_softraw = atkbd->softraw; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->softrepeat = value; if (atkbd->softrepeat) atkbd->softraw = true; atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->softrepeat = old_softrepeat; atkbd->softraw = old_softraw; atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_softraw(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->softraw ? 1 : 0); } static ssize_t atkbd_set_softraw(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_softraw; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->softraw != value) { old_dev = atkbd->dev; old_softraw = atkbd->softraw; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->softraw = value; atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->softraw = old_softraw; atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_err_count(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%lu\n", atkbd->err_count); } static int __init atkbd_setup_forced_release(const struct dmi_system_id *id) { atkbd_platform_fixup = atkbd_apply_forced_release_keylist; atkbd_platform_fixup_data = id->driver_data; return 1; } static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id) { atkbd_platform_scancode_fixup = id->driver_data; return 1; } static int __init atkbd_deactivate_fixup(const struct dmi_system_id *id) { atkbd_skip_deactivate = true; return 1; } /* * NOTE: do not add any more "force release" quirks to this table. The * task of adjusting list of keys that should be "released" automatically * by the driver is now delegated to userspace tools, such as udev, so * submit such quirks there. */ static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */ }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_dell_laptop_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */ }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_dell_laptop_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "HP 2133"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_hp_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion ZV6100"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4000"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4100"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4200"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { /* Inventec Symphony */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "INVENTEC"), DMI_MATCH(DMI_PRODUCT_NAME, "SYMPHONY 6.0/7.0"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { /* Samsung NC10 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "NC10"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_samsung_forced_release_keys, }, { /* Samsung NC20 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "NC20"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_samsung_forced_release_keys, }, { /* Samsung SQ45S70S */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "SQ45S70S"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_samsung_forced_release_keys, }, { /* Fujitsu Amilo PA 1510 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pa 1510"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { /* Fujitsu Amilo Pi 3525 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pi 3525"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_amilo_pi3525_forced_release_keys, }, { /* Fujitsu Amilo Xi 3650 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Xi 3650"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_amilo_xi3650_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Soltech Corporation"), DMI_MATCH(DMI_PRODUCT_NAME, "TA12"), }, .callback = atkbd_setup_forced_release, .driver_data = atkdb_soltech_ta12_forced_release_keys, }, { /* OQO Model 01+ */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "OQO"), DMI_MATCH(DMI_PRODUCT_NAME, "ZEPTO"), }, .callback = atkbd_setup_scancode_fixup, .driver_data = atkbd_oqo_01plus_scancode_fixup, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"), }, .callback = atkbd_deactivate_fixup, }, { } }; static int __init atkbd_init(void) { dmi_check_system(atkbd_dmi_quirk_table); return serio_register_driver(&atkbd_drv); } static void __exit atkbd_exit(void) { serio_unregister_driver(&atkbd_drv); } module_init(atkbd_init); module_exit(atkbd_exit);
5 5 7 4 5 6 6 6 6 6 6 6 6 1056 1056 1054 25 16 16 16 16 3 15 14 9 9 9 5 5 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool_netlink.h> #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/rtnetlink.h> #include <linux/ptp_clock_kernel.h> #include <linux/phy_link_topology.h> #include <net/netdev_queues.h> #include "netlink.h" #include "common.h" #include "../core/dev.h" const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_SG_BIT] = "tx-scatter-gather", [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", [NETIF_F_HIGHDMA_BIT] = "highdma", [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert", [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse", [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter", [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", [NETIF_F_GSO_BIT] = "tx-generic-segmentation", [NETIF_F_GRO_BIT] = "rx-gro", [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", [NETIF_F_LRO_BIT] = "rx-lro", [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_GSO_ACCECN_BIT] = "tx-tcp-accecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation", [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", [NETIF_F_RXHASH_BIT] = "rx-hashing", [NETIF_F_RXCSUM_BIT] = "rx-checksum", [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", [NETIF_F_LOOPBACK_BIT] = "loopback", [NETIF_F_RXFCS_BIT] = "rx-fcs", [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_HW_TC_BIT] = "hw-tc-offload", [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record", [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload", [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", [NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list", [NETIF_F_HW_MACSEC_BIT] = "macsec-hw-offload", [NETIF_F_GRO_UDP_FWD_BIT] = "rx-udp-gro-forwarding", [NETIF_F_HW_HSR_TAG_INS_BIT] = "hsr-tag-ins-offload", [NETIF_F_HW_HSR_TAG_RM_BIT] = "hsr-tag-rm-offload", [NETIF_F_HW_HSR_FWD_BIT] = "hsr-fwd-offload", [NETIF_F_HW_HSR_DUP_BIT] = "hsr-dup-offload", }; const char rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { [ETH_RSS_HASH_TOP_BIT] = "toeplitz", [ETH_RSS_HASH_XOR_BIT] = "xor", [ETH_RSS_HASH_CRC32_BIT] = "crc32", }; const char tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", [ETHTOOL_TX_COPYBREAK_BUF_SIZE] = "tx-copybreak-buf-size", }; const char phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", }; #define __LINK_MODE_NAME(speed, type, duplex) \ #speed "base" #type "/" #duplex #define __DEFINE_LINK_MODE_NAME(speed, type, duplex) \ [ETHTOOL_LINK_MODE(speed, type, duplex)] = \ __LINK_MODE_NAME(speed, type, duplex) #define __DEFINE_SPECIAL_MODE_NAME(_mode, _name) \ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = _name const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T, Half), __DEFINE_LINK_MODE_NAME(10, T, Full), __DEFINE_LINK_MODE_NAME(100, T, Half), __DEFINE_LINK_MODE_NAME(100, T, Full), __DEFINE_LINK_MODE_NAME(1000, T, Half), __DEFINE_LINK_MODE_NAME(1000, T, Full), __DEFINE_SPECIAL_MODE_NAME(Autoneg, "Autoneg"), __DEFINE_SPECIAL_MODE_NAME(TP, "TP"), __DEFINE_SPECIAL_MODE_NAME(AUI, "AUI"), __DEFINE_SPECIAL_MODE_NAME(MII, "MII"), __DEFINE_SPECIAL_MODE_NAME(FIBRE, "FIBRE"), __DEFINE_SPECIAL_MODE_NAME(BNC, "BNC"), __DEFINE_LINK_MODE_NAME(10000, T, Full), __DEFINE_SPECIAL_MODE_NAME(Pause, "Pause"), __DEFINE_SPECIAL_MODE_NAME(Asym_Pause, "Asym_Pause"), __DEFINE_LINK_MODE_NAME(2500, X, Full), __DEFINE_SPECIAL_MODE_NAME(Backplane, "Backplane"), __DEFINE_LINK_MODE_NAME(1000, KX, Full), __DEFINE_LINK_MODE_NAME(10000, KX4, Full), __DEFINE_LINK_MODE_NAME(10000, KR, Full), __DEFINE_SPECIAL_MODE_NAME(10000baseR_FEC, "10000baseR_FEC"), __DEFINE_LINK_MODE_NAME(20000, MLD2, Full), __DEFINE_LINK_MODE_NAME(20000, KR2, Full), __DEFINE_LINK_MODE_NAME(40000, KR4, Full), __DEFINE_LINK_MODE_NAME(40000, CR4, Full), __DEFINE_LINK_MODE_NAME(40000, SR4, Full), __DEFINE_LINK_MODE_NAME(40000, LR4, Full), __DEFINE_LINK_MODE_NAME(56000, KR4, Full), __DEFINE_LINK_MODE_NAME(56000, CR4, Full), __DEFINE_LINK_MODE_NAME(56000, SR4, Full), __DEFINE_LINK_MODE_NAME(56000, LR4, Full), __DEFINE_LINK_MODE_NAME(25000, CR, Full), __DEFINE_LINK_MODE_NAME(25000, KR, Full), __DEFINE_LINK_MODE_NAME(25000, SR, Full), __DEFINE_LINK_MODE_NAME(50000, CR2, Full), __DEFINE_LINK_MODE_NAME(50000, KR2, Full), __DEFINE_LINK_MODE_NAME(100000, KR4, Full), __DEFINE_LINK_MODE_NAME(100000, SR4, Full), __DEFINE_LINK_MODE_NAME(100000, CR4, Full), __DEFINE_LINK_MODE_NAME(100000, LR4_ER4, Full), __DEFINE_LINK_MODE_NAME(50000, SR2, Full), __DEFINE_LINK_MODE_NAME(1000, X, Full), __DEFINE_LINK_MODE_NAME(10000, CR, Full), __DEFINE_LINK_MODE_NAME(10000, SR, Full), __DEFINE_LINK_MODE_NAME(10000, LR, Full), __DEFINE_LINK_MODE_NAME(10000, LRM, Full), __DEFINE_LINK_MODE_NAME(10000, ER, Full), __DEFINE_LINK_MODE_NAME(2500, T, Full), __DEFINE_LINK_MODE_NAME(5000, T, Full), __DEFINE_SPECIAL_MODE_NAME(FEC_NONE, "None"), __DEFINE_SPECIAL_MODE_NAME(FEC_RS, "RS"), __DEFINE_SPECIAL_MODE_NAME(FEC_BASER, "BASER"), __DEFINE_LINK_MODE_NAME(50000, KR, Full), __DEFINE_LINK_MODE_NAME(50000, SR, Full), __DEFINE_LINK_MODE_NAME(50000, CR, Full), __DEFINE_LINK_MODE_NAME(50000, LR_ER_FR, Full), __DEFINE_LINK_MODE_NAME(50000, DR, Full), __DEFINE_LINK_MODE_NAME(100000, KR2, Full), __DEFINE_LINK_MODE_NAME(100000, SR2, Full), __DEFINE_LINK_MODE_NAME(100000, CR2, Full), __DEFINE_LINK_MODE_NAME(100000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_NAME(100000, DR2, Full), __DEFINE_LINK_MODE_NAME(200000, KR4, Full), __DEFINE_LINK_MODE_NAME(200000, SR4, Full), __DEFINE_LINK_MODE_NAME(200000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_NAME(200000, DR4, Full), __DEFINE_LINK_MODE_NAME(200000, CR4, Full), __DEFINE_LINK_MODE_NAME(100, T1, Full), __DEFINE_LINK_MODE_NAME(1000, T1, Full), __DEFINE_LINK_MODE_NAME(400000, KR8, Full), __DEFINE_LINK_MODE_NAME(400000, SR8, Full), __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_NAME(400000, DR8, Full), __DEFINE_LINK_MODE_NAME(400000, CR8, Full), __DEFINE_SPECIAL_MODE_NAME(FEC_LLRS, "LLRS"), __DEFINE_LINK_MODE_NAME(100000, KR, Full), __DEFINE_LINK_MODE_NAME(100000, SR, Full), __DEFINE_LINK_MODE_NAME(100000, LR_ER_FR, Full), __DEFINE_LINK_MODE_NAME(100000, DR, Full), __DEFINE_LINK_MODE_NAME(100000, CR, Full), __DEFINE_LINK_MODE_NAME(200000, KR2, Full), __DEFINE_LINK_MODE_NAME(200000, SR2, Full), __DEFINE_LINK_MODE_NAME(200000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_NAME(200000, DR2, Full), __DEFINE_LINK_MODE_NAME(200000, CR2, Full), __DEFINE_LINK_MODE_NAME(400000, KR4, Full), __DEFINE_LINK_MODE_NAME(400000, SR4, Full), __DEFINE_LINK_MODE_NAME(400000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_NAME(400000, DR4, Full), __DEFINE_LINK_MODE_NAME(400000, CR4, Full), __DEFINE_LINK_MODE_NAME(100, FX, Half), __DEFINE_LINK_MODE_NAME(100, FX, Full), __DEFINE_LINK_MODE_NAME(10, T1L, Full), __DEFINE_LINK_MODE_NAME(800000, CR8, Full), __DEFINE_LINK_MODE_NAME(800000, KR8, Full), __DEFINE_LINK_MODE_NAME(800000, DR8, Full), __DEFINE_LINK_MODE_NAME(800000, DR8_2, Full), __DEFINE_LINK_MODE_NAME(800000, SR8, Full), __DEFINE_LINK_MODE_NAME(800000, VR8, Full), __DEFINE_LINK_MODE_NAME(10, T1S, Full), __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), __DEFINE_LINK_MODE_NAME(200000, CR, Full), __DEFINE_LINK_MODE_NAME(200000, KR, Full), __DEFINE_LINK_MODE_NAME(200000, DR, Full), __DEFINE_LINK_MODE_NAME(200000, DR_2, Full), __DEFINE_LINK_MODE_NAME(200000, SR, Full), __DEFINE_LINK_MODE_NAME(200000, VR, Full), __DEFINE_LINK_MODE_NAME(400000, CR2, Full), __DEFINE_LINK_MODE_NAME(400000, KR2, Full), __DEFINE_LINK_MODE_NAME(400000, DR2, Full), __DEFINE_LINK_MODE_NAME(400000, DR2_2, Full), __DEFINE_LINK_MODE_NAME(400000, SR2, Full), __DEFINE_LINK_MODE_NAME(400000, VR2, Full), __DEFINE_LINK_MODE_NAME(800000, CR4, Full), __DEFINE_LINK_MODE_NAME(800000, KR4, Full), __DEFINE_LINK_MODE_NAME(800000, DR4, Full), __DEFINE_LINK_MODE_NAME(800000, DR4_2, Full), __DEFINE_LINK_MODE_NAME(800000, SR4, Full), __DEFINE_LINK_MODE_NAME(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_CR 1 #define __LINK_MODE_LANES_CR2 2 #define __LINK_MODE_LANES_CR4 4 #define __LINK_MODE_LANES_CR8 8 #define __LINK_MODE_LANES_DR 1 #define __LINK_MODE_LANES_DR_2 1 #define __LINK_MODE_LANES_DR2 2 #define __LINK_MODE_LANES_DR2_2 2 #define __LINK_MODE_LANES_DR4 4 #define __LINK_MODE_LANES_DR4_2 4 #define __LINK_MODE_LANES_DR8 8 #define __LINK_MODE_LANES_KR 1 #define __LINK_MODE_LANES_KR2 2 #define __LINK_MODE_LANES_KR4 4 #define __LINK_MODE_LANES_KR8 8 #define __LINK_MODE_LANES_SR 1 #define __LINK_MODE_LANES_SR2 2 #define __LINK_MODE_LANES_SR4 4 #define __LINK_MODE_LANES_SR8 8 #define __LINK_MODE_LANES_ER 1 #define __LINK_MODE_LANES_KX 1 #define __LINK_MODE_LANES_KX4 4 #define __LINK_MODE_LANES_LR 1 #define __LINK_MODE_LANES_LR4 4 #define __LINK_MODE_LANES_LR4_ER4 4 #define __LINK_MODE_LANES_LR_ER_FR 1 #define __LINK_MODE_LANES_LR2_ER2_FR2 2 #define __LINK_MODE_LANES_LR4_ER4_FR4 4 #define __LINK_MODE_LANES_LR8_ER8_FR8 8 #define __LINK_MODE_LANES_LRM 1 #define __LINK_MODE_LANES_MLD2 2 #define __LINK_MODE_LANES_T 1 #define __LINK_MODE_LANES_T1 1 #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 #define __LINK_MODE_LANES_T1L 1 #define __LINK_MODE_LANES_T1S 1 #define __LINK_MODE_LANES_T1S_P2MP 1 #define __LINK_MODE_LANES_VR 1 #define __LINK_MODE_LANES_VR2 2 #define __LINK_MODE_LANES_VR4 4 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 #define __LINK_MODE_LANES_T1BRR 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ .speed = SPEED_ ## _speed, \ .lanes = __LINK_MODE_LANES_ ## _type, \ .duplex = __DUPLEX_ ## _duplex \ } #define __DUPLEX_Half DUPLEX_HALF #define __DUPLEX_Full DUPLEX_FULL #define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \ .speed = SPEED_UNKNOWN, \ .lanes = 0, \ .duplex = DUPLEX_UNKNOWN, \ } const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T, Half), __DEFINE_LINK_MODE_PARAMS(10, T, Full), __DEFINE_LINK_MODE_PARAMS(100, T, Half), __DEFINE_LINK_MODE_PARAMS(100, T, Full), __DEFINE_LINK_MODE_PARAMS(1000, T, Half), __DEFINE_LINK_MODE_PARAMS(1000, T, Full), __DEFINE_SPECIAL_MODE_PARAMS(Autoneg), __DEFINE_SPECIAL_MODE_PARAMS(TP), __DEFINE_SPECIAL_MODE_PARAMS(AUI), __DEFINE_SPECIAL_MODE_PARAMS(MII), __DEFINE_SPECIAL_MODE_PARAMS(FIBRE), __DEFINE_SPECIAL_MODE_PARAMS(BNC), __DEFINE_LINK_MODE_PARAMS(10000, T, Full), __DEFINE_SPECIAL_MODE_PARAMS(Pause), __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause), __DEFINE_LINK_MODE_PARAMS(2500, X, Full), __DEFINE_SPECIAL_MODE_PARAMS(Backplane), __DEFINE_LINK_MODE_PARAMS(1000, KX, Full), __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full), __DEFINE_LINK_MODE_PARAMS(10000, KR, Full), [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = { .speed = SPEED_10000, .lanes = 1, .duplex = DUPLEX_FULL, }, __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full), __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full), __DEFINE_LINK_MODE_PARAMS(25000, CR, Full), __DEFINE_LINK_MODE_PARAMS(25000, KR, Full), __DEFINE_LINK_MODE_PARAMS(25000, SR, Full), __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full), __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full), __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full), __DEFINE_LINK_MODE_PARAMS(1000, X, Full), __DEFINE_LINK_MODE_PARAMS(10000, CR, Full), __DEFINE_LINK_MODE_PARAMS(10000, SR, Full), __DEFINE_LINK_MODE_PARAMS(10000, LR, Full), __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full), __DEFINE_LINK_MODE_PARAMS(10000, ER, Full), __DEFINE_LINK_MODE_PARAMS(2500, T, Full), __DEFINE_LINK_MODE_PARAMS(5000, T, Full), __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE), __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS), __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER), __DEFINE_LINK_MODE_PARAMS(50000, KR, Full), __DEFINE_LINK_MODE_PARAMS(50000, SR, Full), __DEFINE_LINK_MODE_PARAMS(50000, CR, Full), __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full), __DEFINE_LINK_MODE_PARAMS(50000, DR, Full), __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100, T1, Full), __DEFINE_LINK_MODE_PARAMS(1000, T1, Full), __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full), __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS), __DEFINE_LINK_MODE_PARAMS(100000, KR, Full), __DEFINE_LINK_MODE_PARAMS(100000, SR, Full), __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full), __DEFINE_LINK_MODE_PARAMS(100000, DR, Full), __DEFINE_LINK_MODE_PARAMS(100000, CR, Full), __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full), __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100, FX, Half), __DEFINE_LINK_MODE_PARAMS(100, FX, Full), __DEFINE_LINK_MODE_PARAMS(10, T1L, Full), __DEFINE_LINK_MODE_PARAMS(800000, CR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, KR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, DR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, DR8_2, Full), __DEFINE_LINK_MODE_PARAMS(800000, SR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, VR8, Full), __DEFINE_LINK_MODE_PARAMS(10, T1S, Full), __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), __DEFINE_LINK_MODE_PARAMS(200000, CR, Full), __DEFINE_LINK_MODE_PARAMS(200000, KR, Full), __DEFINE_LINK_MODE_PARAMS(200000, DR, Full), __DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full), __DEFINE_LINK_MODE_PARAMS(200000, SR, Full), __DEFINE_LINK_MODE_PARAMS(200000, VR, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR2, Full), __DEFINE_LINK_MODE_PARAMS(400000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR2, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full), __DEFINE_LINK_MODE_PARAMS(400000, SR2, Full), __DEFINE_LINK_MODE_PARAMS(400000, VR2, Full), __DEFINE_LINK_MODE_PARAMS(800000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(800000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(800000, DR4, Full), __DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full), __DEFINE_LINK_MODE_PARAMS(800000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); EXPORT_SYMBOL_GPL(link_mode_params); const char netif_msg_class_names[][ETH_GSTRING_LEN] = { [NETIF_MSG_DRV_BIT] = "drv", [NETIF_MSG_PROBE_BIT] = "probe", [NETIF_MSG_LINK_BIT] = "link", [NETIF_MSG_TIMER_BIT] = "timer", [NETIF_MSG_IFDOWN_BIT] = "ifdown", [NETIF_MSG_IFUP_BIT] = "ifup", [NETIF_MSG_RX_ERR_BIT] = "rx_err", [NETIF_MSG_TX_ERR_BIT] = "tx_err", [NETIF_MSG_TX_QUEUED_BIT] = "tx_queued", [NETIF_MSG_INTR_BIT] = "intr", [NETIF_MSG_TX_DONE_BIT] = "tx_done", [NETIF_MSG_RX_STATUS_BIT] = "rx_status", [NETIF_MSG_PKTDATA_BIT] = "pktdata", [NETIF_MSG_HW_BIT] = "hw", [NETIF_MSG_WOL_BIT] = "wol", }; static_assert(ARRAY_SIZE(netif_msg_class_names) == NETIF_MSG_CLASS_COUNT); const char wol_mode_names[][ETH_GSTRING_LEN] = { [const_ilog2(WAKE_PHY)] = "phy", [const_ilog2(WAKE_UCAST)] = "ucast", [const_ilog2(WAKE_MCAST)] = "mcast", [const_ilog2(WAKE_BCAST)] = "bcast", [const_ilog2(WAKE_ARP)] = "arp", [const_ilog2(WAKE_MAGIC)] = "magic", [const_ilog2(WAKE_MAGICSECURE)] = "magicsecure", [const_ilog2(WAKE_FILTER)] = "filter", }; static_assert(ARRAY_SIZE(wol_mode_names) == WOL_MODE_COUNT); const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_TX_HARDWARE)] = "hardware-transmit", [const_ilog2(SOF_TIMESTAMPING_TX_SOFTWARE)] = "software-transmit", [const_ilog2(SOF_TIMESTAMPING_RX_HARDWARE)] = "hardware-receive", [const_ilog2(SOF_TIMESTAMPING_RX_SOFTWARE)] = "software-receive", [const_ilog2(SOF_TIMESTAMPING_SOFTWARE)] = "software-system-clock", [const_ilog2(SOF_TIMESTAMPING_SYS_HARDWARE)] = "hardware-legacy-clock", [const_ilog2(SOF_TIMESTAMPING_RAW_HARDWARE)] = "hardware-raw-clock", [const_ilog2(SOF_TIMESTAMPING_OPT_ID)] = "option-id", [const_ilog2(SOF_TIMESTAMPING_TX_SCHED)] = "sched-transmit", [const_ilog2(SOF_TIMESTAMPING_TX_ACK)] = "ack-transmit", [const_ilog2(SOF_TIMESTAMPING_OPT_CMSG)] = "option-cmsg", [const_ilog2(SOF_TIMESTAMPING_OPT_TSONLY)] = "option-tsonly", [const_ilog2(SOF_TIMESTAMPING_OPT_STATS)] = "option-stats", [const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo", [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp", [const_ilog2(SOF_TIMESTAMPING_OPT_RX_FILTER)] = "option-rx-filter", [const_ilog2(SOF_TIMESTAMPING_TX_COMPLETION)] = "tx-completion", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); const char ts_tx_type_names[][ETH_GSTRING_LEN] = { [HWTSTAMP_TX_OFF] = "off", [HWTSTAMP_TX_ON] = "on", [HWTSTAMP_TX_ONESTEP_SYNC] = "onestep-sync", [HWTSTAMP_TX_ONESTEP_P2P] = "onestep-p2p", }; static_assert(ARRAY_SIZE(ts_tx_type_names) == __HWTSTAMP_TX_CNT); const char ts_rx_filter_names[][ETH_GSTRING_LEN] = { [HWTSTAMP_FILTER_NONE] = "none", [HWTSTAMP_FILTER_ALL] = "all", [HWTSTAMP_FILTER_SOME] = "some", [HWTSTAMP_FILTER_PTP_V1_L4_EVENT] = "ptpv1-l4-event", [HWTSTAMP_FILTER_PTP_V1_L4_SYNC] = "ptpv1-l4-sync", [HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ] = "ptpv1-l4-delay-req", [HWTSTAMP_FILTER_PTP_V2_L4_EVENT] = "ptpv2-l4-event", [HWTSTAMP_FILTER_PTP_V2_L4_SYNC] = "ptpv2-l4-sync", [HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ] = "ptpv2-l4-delay-req", [HWTSTAMP_FILTER_PTP_V2_L2_EVENT] = "ptpv2-l2-event", [HWTSTAMP_FILTER_PTP_V2_L2_SYNC] = "ptpv2-l2-sync", [HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ] = "ptpv2-l2-delay-req", [HWTSTAMP_FILTER_PTP_V2_EVENT] = "ptpv2-event", [HWTSTAMP_FILTER_PTP_V2_SYNC] = "ptpv2-sync", [HWTSTAMP_FILTER_PTP_V2_DELAY_REQ] = "ptpv2-delay-req", [HWTSTAMP_FILTER_NTP_ALL] = "ntp-all", }; static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT); const char ts_flags_names[][ETH_GSTRING_LEN] = { [const_ilog2(HWTSTAMP_FLAG_BONDED_PHC_INDEX)] = "bonded-phc-index", }; static_assert(ARRAY_SIZE(ts_flags_names) == __HWTSTAMP_FLAG_CNT); const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = { [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan", [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve", [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE] = "vxlan-gpe", }; static_assert(ARRAY_SIZE(udp_tunnel_type_names) == __ETHTOOL_UDP_TUNNEL_TYPE_CNT); /* return false if legacy contained non-0 deprecated fields * maxtxpkt/maxrxpkt. rest of ksettings always updated */ bool convert_legacy_settings_to_link_ksettings( struct ethtool_link_ksettings *link_ksettings, const struct ethtool_cmd *legacy_settings) { bool retval = true; memset(link_ksettings, 0, sizeof(*link_ksettings)); /* This is used to tell users that driver is still using these * deprecated legacy fields, and they should not use * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS */ if (legacy_settings->maxtxpkt || legacy_settings->maxrxpkt) retval = false; ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.supported, legacy_settings->supported); ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.advertising, legacy_settings->advertising); ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.lp_advertising, legacy_settings->lp_advertising); link_ksettings->base.speed = ethtool_cmd_speed(legacy_settings); link_ksettings->base.duplex = legacy_settings->duplex; link_ksettings->base.port = legacy_settings->port; link_ksettings->base.phy_address = legacy_settings->phy_address; link_ksettings->base.autoneg = legacy_settings->autoneg; link_ksettings->base.mdio_support = legacy_settings->mdio_support; link_ksettings->base.eth_tp_mdix = legacy_settings->eth_tp_mdix; link_ksettings->base.eth_tp_mdix_ctrl = legacy_settings->eth_tp_mdix_ctrl; return retval; } int __ethtool_get_link(struct net_device *dev) { if (!dev->ethtool_ops->get_link) return -EOPNOTSUPP; return netif_running(dev) && dev->ethtool_ops->get_link(dev); } static int ethtool_get_rxnfc_rule_count(struct net_device *dev) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxnfc info = { .cmd = ETHTOOL_GRXCLSRLCNT, }; int err; err = ops->get_rxnfc(dev, &info, NULL); if (err) return err; return info.rule_cnt; } /* Max offset for one RSS context */ static u32 ethtool_get_rss_ctx_max_channel(struct ethtool_rxfh_context *ctx) { u32 max_ring = 0; u32 i, *tbl; if (WARN_ON_ONCE(!ctx)) return 0; tbl = ethtool_rxfh_context_indir(ctx); for (i = 0; i < ctx->indir_size; i++) max_ring = max(max_ring, tbl[i]); return max_ring; } static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxnfc *info; int err, i, rule_cnt; u64 max_ring = 0; if (!ops->get_rxnfc) return -EOPNOTSUPP; rule_cnt = ethtool_get_rxnfc_rule_count(dev); if (rule_cnt <= 0) return -EINVAL; info = kvzalloc(struct_size(info, rule_locs, rule_cnt), GFP_KERNEL); if (!info) return -ENOMEM; info->cmd = ETHTOOL_GRXCLSRLALL; info->rule_cnt = rule_cnt; err = ops->get_rxnfc(dev, info, info->rule_locs); if (err) goto err_free_info; for (i = 0; i < rule_cnt; i++) { struct ethtool_rxnfc rule_info = { .cmd = ETHTOOL_GRXCLSRULE, .fs.location = info->rule_locs[i], }; err = ops->get_rxnfc(dev, &rule_info, NULL); if (err) goto err_free_info; if (rule_info.fs.ring_cookie != RX_CLS_FLOW_DISC && rule_info.fs.ring_cookie != RX_CLS_FLOW_WAKE && !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie)) { u64 ring = rule_info.fs.ring_cookie; if (rule_info.flow_type & FLOW_RSS) { struct ethtool_rxfh_context *ctx; ctx = xa_load(&dev->ethtool->rss_ctx, rule_info.rss_context); ring += ethtool_get_rss_ctx_max_channel(ctx); } max_ring = max_t(u64, max_ring, ring); } } kvfree(info); *max = max_ring; return 0; err_free_info: kvfree(info); return err; } /* Max offset across all of a device's RSS contexts */ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev) { struct ethtool_rxfh_context *ctx; unsigned long context; u32 max_ring = 0; mutex_lock(&dev->ethtool->rss_lock); xa_for_each(&dev->ethtool->rss_ctx, context, ctx) max_ring = max(max_ring, ethtool_get_rss_ctx_max_channel(ctx)); mutex_unlock(&dev->ethtool->rss_lock); return max_ring; } static u32 ethtool_get_max_rxfh_channel(struct net_device *dev) { struct ethtool_rxfh_param rxfh = {}; u32 dev_size, current_max = 0; int ret; /* While we do track whether RSS context has an indirection * table explicitly set by the user, no driver looks at that bit. * Assume drivers won't auto-regenerate the additional tables, * to be safe. */ current_max = ethtool_get_max_rss_ctx_channel(dev); if (!netif_is_rxfh_configured(dev)) return current_max; if (!dev->ethtool_ops->get_rxfh_indir_size || !dev->ethtool_ops->get_rxfh) return current_max; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) return current_max; rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER); if (!rxfh.indir) return U32_MAX; ret = dev->ethtool_ops->get_rxfh(dev, &rxfh); if (ret) { current_max = U32_MAX; goto out_free; } while (dev_size--) current_max = max(current_max, rxfh.indir[dev_size]); out_free: kfree(rxfh.indir); return current_max; } int ethtool_check_max_channel(struct net_device *dev, struct ethtool_channels channels, struct genl_info *info) { u64 max_rxnfc_in_use; u32 max_rxfh_in_use; int max_mp_in_use; /* ensure the new Rx count fits within the configured Rx flow * indirection table/rxnfc settings */ if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use)) max_rxnfc_in_use = 0; max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev); if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) { if (info) GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use); return -EINVAL; } if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) { if (info) GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings"); return -EINVAL; } max_mp_in_use = dev_get_min_mp_channel_count(dev); if (channels.combined_count + channels.rx_count <= max_mp_in_use) { if (info) GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing memory provider setting (%d)", max_mp_in_use); return -EINVAL; } return 0; } int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxnfc *info; int rc, i, rule_cnt; if (!ops->get_rxnfc) return 0; rule_cnt = ethtool_get_rxnfc_rule_count(dev); if (!rule_cnt) return 0; if (rule_cnt < 0) return -EINVAL; info = kvzalloc(struct_size(info, rule_locs, rule_cnt), GFP_KERNEL); if (!info) return -ENOMEM; info->cmd = ETHTOOL_GRXCLSRLALL; info->rule_cnt = rule_cnt; rc = ops->get_rxnfc(dev, info, info->rule_locs); if (rc) goto out_free; for (i = 0; i < rule_cnt; i++) { struct ethtool_rxnfc rule_info = { .cmd = ETHTOOL_GRXCLSRULE, .fs.location = info->rule_locs[i], }; rc = ops->get_rxnfc(dev, &rule_info, NULL); if (rc) goto out_free; if (rule_info.fs.flow_type & FLOW_RSS && rule_info.rss_context == rss_context) { rc = -EBUSY; goto out_free; } } out_free: kvfree(info); return rc; } int ethtool_check_ops(const struct ethtool_ops *ops) { if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params)) return -EINVAL; if (WARN_ON(ops->rxfh_max_num_contexts == 1)) return -EINVAL; /* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime, * the fact that ops are checked at registration time does not * mean the ops attached to a netdev later on are sane. */ return 0; } void ethtool_ringparam_get_cfg(struct net_device *dev, struct ethtool_ringparam *param, struct kernel_ethtool_ringparam *kparam, struct netlink_ext_ack *extack) { memset(param, 0, sizeof(*param)); memset(kparam, 0, sizeof(*kparam)); param->cmd = ETHTOOL_GRINGPARAM; dev->ethtool_ops->get_ringparam(dev, param, kparam, extack); /* Driver gives us current state, we want to return current config */ kparam->tcp_data_split = dev->cfg->hds_config; kparam->hds_thresh = dev->cfg->hds_thresh; } static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) { memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; info->phc_index = -1; } int ethtool_net_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc) { const struct ethtool_ops *ops = dev->ethtool_ops; int err; if (!ops->get_ts_info) return -ENODEV; /* Does ptp comes from netdev */ ethtool_init_tsinfo(info); info->phc_qualifier = hwprov_desc->qualifier; err = ops->get_ts_info(dev, info); if (err) return err; if (info->phc_index == hwprov_desc->index && net_support_hwtstamp_qualifier(dev, hwprov_desc->qualifier)) return 0; return -ENODEV; } struct phy_device * ethtool_phy_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc) { int err; /* Only precise qualifier is supported in phydev */ if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) return ERR_PTR(-ENODEV); /* Look in the phy topology */ if (dev->link_topo) { struct phy_device_node *pdn; unsigned long phy_index; xa_for_each(&dev->link_topo->phys, phy_index, pdn) { if (!phy_has_tsinfo(pdn->phy)) continue; ethtool_init_tsinfo(info); err = phy_ts_info(pdn->phy, info); if (err) return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) return pdn->phy; } return ERR_PTR(-ENODEV); } /* Look on the dev->phydev */ if (phy_has_tsinfo(dev->phydev)) { ethtool_init_tsinfo(info); err = phy_ts_info(dev->phydev, info); if (err) return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) return dev->phydev; } return ERR_PTR(-ENODEV); } int ethtool_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc) { int err; err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc); if (err == -ENODEV) { struct phy_device *phy; phy = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); if (IS_ERR(phy)) return PTR_ERR(phy); /* Report the phc source only if we have a real * phc source with an index. */ if (info->phc_index >= 0) { info->phc_source = HWTSTAMP_SOURCE_PHYLIB; info->phc_phyindex = phy->phyindex; } err = 0; } else if (!err && info->phc_index >= 0) { info->phc_source = HWTSTAMP_SOURCE_NETDEV; } info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; return err; } int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct hwtstamp_provider *hwprov; int err = 0; rcu_read_lock(); hwprov = rcu_dereference(dev->hwprov); /* No provider specified, use default behavior */ if (!hwprov) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; ethtool_init_tsinfo(info); if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) { err = phy_ts_info(phydev, info); /* Report the phc source only if we have a real * phc source with an index. */ if (!err && info->phc_index >= 0) { info->phc_source = HWTSTAMP_SOURCE_PHYLIB; info->phc_phyindex = phydev->phyindex; } } else if (ops->get_ts_info) { err = ops->get_ts_info(dev, info); if (!err && info->phc_index >= 0) info->phc_source = HWTSTAMP_SOURCE_NETDEV; } info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; rcu_read_unlock(); return err; } err = ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc); rcu_read_unlock(); return err; } bool net_support_hwtstamp_qualifier(struct net_device *dev, enum hwtstamp_provider_qualifier qualifier) { const struct ethtool_ops *ops = dev->ethtool_ops; if (!ops) return false; /* Return true with precise qualifier and with NIC without * qualifier description to not break the old behavior. */ if (!ops->supported_hwtstamp_qualifiers && qualifier == HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) return true; if (ops->supported_hwtstamp_qualifiers & BIT(qualifier)) return true; return false; } int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { struct kernel_ethtool_ts_info info = { }; int num = 0; if (!__ethtool_get_ts_info(dev, &info)) num = ptp_get_vclocks_index(info.phc_index, vclock_index); return num; } EXPORT_SYMBOL(ethtool_get_phc_vclocks); int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info) { return __ethtool_get_ts_info(dev, info); } EXPORT_SYMBOL(ethtool_get_ts_info_by_layer); const struct ethtool_phy_ops *ethtool_phy_ops; void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) { ASSERT_RTNL(); ethtool_phy_ops = ops; } EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops); void ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, enum ethtool_link_mode_bit_indices link_mode) { const struct link_mode_info *link_info; if (WARN_ON_ONCE(link_mode >= __ETHTOOL_LINK_MODE_MASK_NBITS)) return; link_info = &link_mode_params[link_mode]; link_ksettings->base.speed = link_info->speed; link_ksettings->lanes = link_info->lanes; link_ksettings->base.duplex = link_info->duplex; } EXPORT_SYMBOL_GPL(ethtool_params_from_link_mode); /** * ethtool_forced_speed_maps_init * @maps: Pointer to an array of Ethtool forced speed map * @size: Array size * * Initialize an array of Ethtool forced speed map to Ethtool link modes. This * should be called during driver module init. */ void ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) { for (u32 i = 0; i < size; i++) { struct ethtool_forced_speed_map *map = &maps[i]; linkmode_set_bit_array(map->cap_arr, map->arr_size, map->caps); map->cap_arr = NULL; map->arr_size = 0; } } EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id) { struct ethtool_rxfh_context *ctx; WARN_ONCE(!rtnl_is_locked() && !lockdep_is_held_type(&dev->ethtool->rss_lock, -1), "RSS context lock assertion failed\n"); netdev_err(dev, "device error, RSS context %d lost\n", context_id); ctx = xa_erase(&dev->ethtool->rss_ctx, context_id); kfree(ctx); } EXPORT_SYMBOL(ethtool_rxfh_context_lost);
1 5 5 5 5 3 5 5 5 5 2 3 3 2 5 6 6 6 6 6 6 6 6 6 6 6 6 23 21 18 15 4 9 11 24 24 24 24 23 2 6 6 13 6 8 14 23 24 9 24 24 24 24 24 24 14 24 25 25 2 2 2 25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 // SPDX-License-Identifier: GPL-2.0 #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <net/wext.h> #include <net/hotdata.h> #include "dev.h" static void *dev_seq_from_index(struct seq_file *seq, loff_t *pos) { unsigned long ifindex = *pos; struct net_device *dev; for_each_netdev_dump(seq_file_net(seq), dev, ifindex) { *pos = dev->ifindex; return dev; } return NULL; } static void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; return dev_seq_from_index(seq, pos); } static void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return dev_seq_from_index(seq, pos); } static void dev_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, stats->rx_fifo_errors, stats->rx_length_errors + stats->rx_over_errors + stats->rx_crc_errors + stats->rx_frame_errors, stats->rx_compressed, stats->multicast, stats->tx_bytes, stats->tx_packets, stats->tx_errors, stats->tx_dropped, stats->tx_fifo_errors, stats->collisions, stats->tx_carrier_errors + stats->tx_aborted_errors + stats->tx_window_errors + stats->tx_heartbeat_errors, stats->tx_compressed); } /* * Called from the PROCfs module. This now uses the new arbitrary sized * /proc/net interface to create /proc/net/dev */ static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "Inter-| Receive " " | Transmit\n" " face |bytes packets errs drop fifo frame " "compressed multicast|bytes packets errs " "drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; } static u32 softnet_input_pkt_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->input_pkt_queue); } static u32 softnet_process_queue_len(struct softnet_data *sd) { return skb_queue_len_lockless(&sd->process_queue); } static struct softnet_data *softnet_get_online(loff_t *pos) { struct softnet_data *sd = NULL; while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { sd = &per_cpu(softnet_data, *pos); break; } else ++*pos; return sd; } static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) { return softnet_get_online(pos); } static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return softnet_get_online(pos); } static void softnet_seq_stop(struct seq_file *seq, void *v) { } static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; u32 input_qlen = softnet_input_pkt_queue_len(sd); u32 process_qlen = softnet_process_queue_len(sd); unsigned int flow_limit_count = 0; #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit *fl; rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); /* Pairs with WRITE_ONCE() in skb_flow_limit() */ if (fl) flow_limit_count = READ_ONCE(fl->count); rcu_read_unlock(); #endif /* the index is the CPU id owing this sd. Since offline CPUs are not * displayed, it would be othrwise not trivial for the user-space * mapping the data a specific CPU */ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x\n", READ_ONCE(sd->processed), atomic_read(&sd->dropped), READ_ONCE(sd->time_squeeze), 0, 0, 0, 0, 0, /* was fastroute */ 0, /* was cpu_collision */ READ_ONCE(sd->received_rps), flow_limit_count, input_qlen + process_qlen, (int)seq->index, input_qlen, process_qlen); return 0; } static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_seq_show, }; static const struct seq_operations softnet_seq_ops = { .start = softnet_seq_start, .next = softnet_seq_next, .stop = softnet_seq_stop, .show = softnet_seq_show, }; static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; struct net_device *dev; loff_t i = 0; int t; for_each_netdev_rcu(seq_file_net(seq), dev) { ptype_list = &dev->ptype_all; list_for_each_entry_rcu(pt, ptype_list, list) { if (i == pos) return pt; ++i; } } list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) { if (i == pos) return pt; ++i; } list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_specific, list) { if (i == pos) return pt; ++i; } for (t = 0; t < PTYPE_HASH_SIZE; t++) { list_for_each_entry_rcu(pt, &ptype_base[t], list) { if (i == pos) return pt; ++i; } } return NULL; } static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; if (pt->dev) { if (nxt != &pt->dev->ptype_all) goto found; dev = pt->dev; for_each_netdev_continue_rcu(seq_file_net(seq), dev) { if (!list_empty(&dev->ptype_all)) { nxt = dev->ptype_all.next; goto found; } } nxt = net->ptype_all.next; goto net_ptype_all; } if (pt->af_packet_net) { net_ptype_all: if (nxt != &net->ptype_all && nxt != &net->ptype_specific) goto found; if (nxt == &net->ptype_all) { /* continue with ->ptype_specific if it's not empty */ nxt = net->ptype_specific.next; if (nxt != &net->ptype_specific) goto found; } hash = 0; nxt = ptype_base[0].next; } else hash = ntohs(pt->type) & PTYPE_HASH_MASK; while (nxt == &ptype_base[hash]) { if (++hash >= PTYPE_HASH_SIZE) return NULL; nxt = ptype_base[hash].next; } found: return list_entry(nxt, struct packet_type, list); } static void ptype_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ptype_seq_show(struct seq_file *seq, void *v) { struct packet_type *pt = v; if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); seq_printf(seq, " %-8s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } return 0; } static const struct seq_operations ptype_seq_ops = { .start = ptype_seq_start, .next = ptype_seq_next, .stop = ptype_seq_stop, .show = ptype_seq_show, }; static int __net_init dev_proc_net_init(struct net *net) { int rc = -ENOMEM; if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops, sizeof(struct seq_net_private))) goto out; if (!proc_create_seq("softnet_stat", 0444, net->proc_net, &softnet_seq_ops)) goto out_dev; if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, sizeof(struct seq_net_private))) goto out_softnet; if (wext_proc_init(net)) goto out_ptype; rc = 0; out: return rc; out_ptype: remove_proc_entry("ptype", net->proc_net); out_softnet: remove_proc_entry("softnet_stat", net->proc_net); out_dev: remove_proc_entry("dev", net->proc_net); goto out; } static void __net_exit dev_proc_net_exit(struct net *net) { wext_proc_exit(net); remove_proc_entry("ptype", net->proc_net); remove_proc_entry("softnet_stat", net->proc_net); remove_proc_entry("dev", net->proc_net); } static struct pernet_operations __net_initdata dev_proc_ops = { .init = dev_proc_net_init, .exit = dev_proc_net_exit, }; static int dev_mc_seq_show(struct seq_file *seq, void *v) { struct netdev_hw_addr *ha; struct net_device *dev = v; if (v == SEQ_START_TOKEN) return 0; netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); } netif_addr_unlock_bh(dev); return 0; } static const struct seq_operations dev_mc_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = dev_mc_seq_show, }; static int __net_init dev_mc_net_init(struct net *net) { if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } static void __net_exit dev_mc_net_exit(struct net *net) { remove_proc_entry("dev_mcast", net->proc_net); } static struct pernet_operations __net_initdata dev_mc_net_ops = { .init = dev_mc_net_init, .exit = dev_mc_net_exit, }; int __init dev_proc_init(void) { int ret = register_pernet_subsys(&dev_proc_ops); if (!ret) return register_pernet_subsys(&dev_mc_net_ops); return ret; }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 154 154 154 153 154 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2019 Oracle. All Rights Reserved. * Author: Darrick J. Wong <darrick.wong@oracle.com> */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_iwalk.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_health.h" #include "xfs_trans.h" #include "xfs_pwork.h" #include "xfs_ag.h" #include "xfs_bit.h" /* * Walking Inodes in the Filesystem * ================================ * * This iterator function walks a subset of filesystem inodes in increasing * order from @startino until there are no more inodes. For each allocated * inode it finds, it calls a walk function with the relevant inode number and * a pointer to caller-provided data. The walk function can return the usual * negative error code to stop the iteration; 0 to continue the iteration; or * -ECANCELED to stop the iteration. This return value is returned to the * caller. * * Internally, we allow the walk function to do anything, which means that we * cannot maintain the inobt cursor or our lock on the AGI buffer. We * therefore cache the inobt records in kernel memory and only call the walk * function when our memory buffer is full. @nr_recs is the number of records * that we've cached, and @sz_recs is the size of our cache. * * It is the responsibility of the walk function to ensure it accesses * allocated inodes, as the inobt records may be stale by the time they are * acted upon. */ struct xfs_iwalk_ag { /* parallel work control data; will be null if single threaded */ struct xfs_pwork pwork; struct xfs_mount *mp; struct xfs_trans *tp; struct xfs_perag *pag; /* Where do we start the traversal? */ xfs_ino_t startino; /* What was the last inode number we saw when iterating the inobt? */ xfs_ino_t lastino; /* Array of inobt records we cache. */ struct xfs_inobt_rec_incore *recs; /* Number of entries allocated for the @recs array. */ unsigned int sz_recs; /* Number of entries in the @recs array that are in use. */ unsigned int nr_recs; /* Inode walk function and data pointer. */ xfs_iwalk_fn iwalk_fn; xfs_inobt_walk_fn inobt_walk_fn; void *data; /* * Make it look like the inodes up to startino are free so that * bulkstat can start its inode iteration at the correct place without * needing to special case everywhere. */ unsigned int trim_start:1; /* Skip empty inobt records? */ unsigned int skip_empty:1; /* Drop the (hopefully empty) transaction when calling iwalk_fn. */ unsigned int drop_trans:1; }; /* * Loop over all clusters in a chunk for a given incore inode allocation btree * record. Do a readahead if there are any allocated inodes in that cluster. */ STATIC void xfs_iwalk_ichunk_ra( struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inobt_rec_incore *irec) { struct xfs_ino_geometry *igeo = M_IGEO(mp); xfs_agblock_t agbno; struct blk_plug plug; int i; /* inode chunk index */ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); blk_start_plug(&plug); for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) { xfs_inofree_t imask; imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster); if (imask & ~irec->ir_free) { xfs_buf_readahead(mp->m_ddev_targp, xfs_agbno_to_daddr(pag, agbno), igeo->blocks_per_cluster * mp->m_bsize, &xfs_inode_buf_ops); } agbno += igeo->blocks_per_cluster; } blk_finish_plug(&plug); } /* * Set the bits in @irec's free mask that correspond to the inodes before * @agino so that we skip them. This is how we restart an inode walk that was * interrupted in the middle of an inode record. */ STATIC void xfs_iwalk_adjust_start( xfs_agino_t agino, /* starting inode of chunk */ struct xfs_inobt_rec_incore *irec) /* btree record */ { int idx; /* index into inode chunk */ idx = agino - irec->ir_startino; irec->ir_free |= xfs_inobt_maskn(0, idx); irec->ir_freecount = hweight64(irec->ir_free); } /* Allocate memory for a walk. */ STATIC int xfs_iwalk_alloc( struct xfs_iwalk_ag *iwag) { size_t size; ASSERT(iwag->recs == NULL); iwag->nr_recs = 0; /* Allocate a prefetch buffer for inobt records. */ size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore); iwag->recs = kmalloc(size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (iwag->recs == NULL) return -ENOMEM; return 0; } /* Free memory we allocated for a walk. */ STATIC void xfs_iwalk_free( struct xfs_iwalk_ag *iwag) { kfree(iwag->recs); iwag->recs = NULL; } /* For each inuse inode in each cached inobt record, call our function. */ STATIC int xfs_iwalk_ag_recs( struct xfs_iwalk_ag *iwag) { struct xfs_mount *mp = iwag->mp; struct xfs_trans *tp = iwag->tp; struct xfs_perag *pag = iwag->pag; unsigned int i, j; int error; for (i = 0; i < iwag->nr_recs; i++) { struct xfs_inobt_rec_incore *irec = &iwag->recs[i]; trace_xfs_iwalk_ag_rec(pag, irec); if (xfs_pwork_want_abort(&iwag->pwork)) return 0; if (iwag->inobt_walk_fn) { error = iwag->inobt_walk_fn(mp, tp, pag_agno(pag), irec, iwag->data); if (error) return error; } if (!iwag->iwalk_fn) continue; for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { if (xfs_pwork_want_abort(&iwag->pwork)) return 0; /* Skip if this inode is free */ if (XFS_INOBT_MASK(j) & irec->ir_free) continue; /* Otherwise call our function. */ error = iwag->iwalk_fn(mp, tp, xfs_agino_to_ino(pag, irec->ir_startino + j), iwag->data); if (error) return error; } } return 0; } /* Delete cursor and let go of AGI. */ static inline void xfs_iwalk_del_inobt( struct xfs_trans *tp, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp, int error) { if (*curpp) { xfs_btree_del_cursor(*curpp, error); *curpp = NULL; } if (*agi_bpp) { xfs_trans_brelse(tp, *agi_bpp); *agi_bpp = NULL; } } /* * Set ourselves up for walking inobt records starting from a given point in * the filesystem. * * If caller passed in a nonzero start inode number, load the record from the * inobt and make the record look like all the inodes before agino are free so * that we skip them, and then move the cursor to the next inobt record. This * is how we support starting an iwalk in the middle of an inode chunk. * * If the caller passed in a start number of zero, move the cursor to the first * inobt record. * * The caller is responsible for cleaning up the cursor and buffer pointer * regardless of the error status. */ STATIC int xfs_iwalk_ag_start( struct xfs_iwalk_ag *iwag, xfs_agino_t agino, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp, int *has_more) { struct xfs_mount *mp = iwag->mp; struct xfs_trans *tp = iwag->tp; struct xfs_perag *pag = iwag->pag; struct xfs_inobt_rec_incore *irec; int error; /* Set up a fresh cursor and empty the inobt cache. */ iwag->nr_recs = 0; error = xfs_ialloc_read_agi(pag, tp, 0, agi_bpp); if (error) return error; *curpp = xfs_inobt_init_cursor(pag, tp, *agi_bpp); /* Starting at the beginning of the AG? That's easy! */ if (agino == 0) return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more); /* * Otherwise, we have to grab the inobt record where we left off, stuff * the record into our cache, and then see if there are more records. * We require a lookup cache of at least two elements so that the * caller doesn't have to deal with tearing down the cursor to walk the * records. */ error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more); if (error) return error; /* * If the LE lookup at @agino yields no records, jump ahead to the * inobt cursor increment to see if there are more records to process. */ if (!*has_more) goto out_advance; /* Get the record, should always work */ irec = &iwag->recs[iwag->nr_recs]; error = xfs_inobt_get_rec(*curpp, irec, has_more); if (error) return error; if (XFS_IS_CORRUPT(mp, *has_more != 1)) { xfs_btree_mark_sick(*curpp); return -EFSCORRUPTED; } iwag->lastino = xfs_agino_to_ino(pag, irec->ir_startino + XFS_INODES_PER_CHUNK - 1); /* * If the LE lookup yielded an inobt record before the cursor position, * skip it and see if there's another one after it. */ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) goto out_advance; /* * If agino fell in the middle of the inode record, make it look like * the inodes up to agino are free so that we don't return them again. */ if (iwag->trim_start) xfs_iwalk_adjust_start(agino, irec); /* * The prefetch calculation is supposed to give us a large enough inobt * record cache that grab_ichunk can stage a partial first record and * the loop body can cache a record without having to check for cache * space until after it reads an inobt record. */ iwag->nr_recs++; ASSERT(iwag->nr_recs < iwag->sz_recs); out_advance: return xfs_btree_increment(*curpp, 0, has_more); } /* * The inobt record cache is full, so preserve the inobt cursor state and * run callbacks on the cached inobt records. When we're done, restore the * cursor state to wherever the cursor would have been had the cache not been * full (and therefore we could've just incremented the cursor) if *@has_more * is true. On exit, *@has_more will indicate whether or not the caller should * try for more inode records. */ STATIC int xfs_iwalk_run_callbacks( struct xfs_iwalk_ag *iwag, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp, int *has_more) { struct xfs_mount *mp = iwag->mp; xfs_agino_t next_agino; int error; next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1; ASSERT(iwag->nr_recs > 0); /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(iwag->tp, curpp, agi_bpp, 0); ASSERT(next_agino >= iwag->recs[iwag->nr_recs - 1].ir_startino + XFS_INODES_PER_CHUNK); if (iwag->drop_trans) { xfs_trans_cancel(iwag->tp); iwag->tp = NULL; } error = xfs_iwalk_ag_recs(iwag); if (error) return error; /* ...empty the cache... */ iwag->nr_recs = 0; if (!has_more) return 0; if (iwag->drop_trans) { error = xfs_trans_alloc_empty(mp, &iwag->tp); if (error) return error; } /* ...and recreate the cursor just past where we left off. */ error = xfs_ialloc_read_agi(iwag->pag, iwag->tp, 0, agi_bpp); if (error) return error; *curpp = xfs_inobt_init_cursor(iwag->pag, iwag->tp, *agi_bpp); return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more); } /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ STATIC int xfs_iwalk_ag( struct xfs_iwalk_ag *iwag) { struct xfs_mount *mp = iwag->mp; struct xfs_perag *pag = iwag->pag; struct xfs_buf *agi_bp = NULL; struct xfs_btree_cur *cur = NULL; xfs_agino_t agino; int has_more; int error = 0; /* Set up our cursor at the right place in the inode btree. */ ASSERT(pag_agno(pag) == XFS_INO_TO_AGNO(mp, iwag->startino)); agino = XFS_INO_TO_AGINO(mp, iwag->startino); error = xfs_iwalk_ag_start(iwag, agino, &cur, &agi_bp, &has_more); while (!error && has_more) { struct xfs_inobt_rec_incore *irec; xfs_ino_t rec_fsino; cond_resched(); if (xfs_pwork_want_abort(&iwag->pwork)) goto out; /* Fetch the inobt record. */ irec = &iwag->recs[iwag->nr_recs]; error = xfs_inobt_get_rec(cur, irec, &has_more); if (error || !has_more) break; /* Make sure that we always move forward. */ rec_fsino = xfs_agino_to_ino(pag, irec->ir_startino); if (iwag->lastino != NULLFSINO && XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) { xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1; /* No allocated inodes in this chunk; skip it. */ if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { error = xfs_btree_increment(cur, 0, &has_more); if (error) break; continue; } /* * Start readahead for this inode chunk in anticipation of * walking the inodes. */ if (iwag->iwalk_fn) xfs_iwalk_ichunk_ra(mp, pag, irec); /* * If there's space in the buffer for more records, increment * the btree cursor and grab more. */ if (++iwag->nr_recs < iwag->sz_recs) { error = xfs_btree_increment(cur, 0, &has_more); if (error || !has_more) break; continue; } /* * Otherwise, we need to save cursor state and run the callback * function on the cached records. The run_callbacks function * is supposed to return a cursor pointing to the record where * we would be if we had been able to increment like above. */ ASSERT(has_more); error = xfs_iwalk_run_callbacks(iwag, &cur, &agi_bp, &has_more); } if (iwag->nr_recs == 0 || error) goto out; /* Walk the unprocessed records in the cache. */ error = xfs_iwalk_run_callbacks(iwag, &cur, &agi_bp, &has_more); out: xfs_iwalk_del_inobt(iwag->tp, &cur, &agi_bp, error); return error; } /* * We experimentally determined that the reduction in ioctl call overhead * diminishes when userspace asks for more than 2048 inodes, so we'll cap * prefetch at this point. */ #define IWALK_MAX_INODE_PREFETCH (2048U) /* * Given the number of inodes to prefetch, set the number of inobt records that * we cache in memory, which controls the number of inodes we try to read * ahead. Set the maximum if @inodes == 0. */ static inline unsigned int xfs_iwalk_prefetch( unsigned int inodes) { unsigned int inobt_records; /* * If the caller didn't tell us the number of inodes they wanted, * assume the maximum prefetch possible for best performance. * Otherwise, cap prefetch at that maximum so that we don't start an * absurd amount of prefetch. */ if (inodes == 0) inodes = IWALK_MAX_INODE_PREFETCH; inodes = min(inodes, IWALK_MAX_INODE_PREFETCH); /* Round the inode count up to a full chunk. */ inodes = round_up(inodes, XFS_INODES_PER_CHUNK); /* * In order to convert the number of inodes to prefetch into an * estimate of the number of inobt records to cache, we require a * conversion factor that reflects our expectations of the average * loading factor of an inode chunk. Based on data gathered, most * (but not all) filesystems manage to keep the inode chunks totally * full, so we'll underestimate slightly so that our readahead will * still deliver the performance we want on aging filesystems: * * inobt = inodes / (INODES_PER_CHUNK * (4 / 5)); * * The funny math is to avoid integer division. */ inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK); /* * Allocate enough space to prefetch at least two inobt records so that * we can cache both the record where the iwalk started and the next * record. This simplifies the AG inode walk loop setup code. */ return max(inobt_records, 2U); } static int xfs_iwalk_args( struct xfs_iwalk_ag *iwag, unsigned int flags) { struct xfs_mount *mp = iwag->mp; xfs_agnumber_t start_agno; int error; start_agno = XFS_INO_TO_AGNO(iwag->mp, iwag->startino); ASSERT(start_agno < iwag->mp->m_sb.sb_agcount); ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); error = xfs_iwalk_alloc(iwag); if (error) return error; while ((iwag->pag = xfs_perag_next_from(mp, iwag->pag, start_agno))) { error = xfs_iwalk_ag(iwag); if (error || (flags & XFS_IWALK_SAME_AG)) { xfs_perag_rele(iwag->pag); break; } iwag->startino = XFS_AGINO_TO_INO(mp, pag_agno(iwag->pag) + 1, 0); } xfs_iwalk_free(iwag); return error; } /* * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn * will be called for each allocated inode, being passed the inode's number and * @data. @max_prefetch controls how many inobt records' worth of inodes we * try to readahead. */ int xfs_iwalk( struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, unsigned int flags, xfs_iwalk_fn iwalk_fn, unsigned int inode_records, void *data) { struct xfs_iwalk_ag iwag = { .mp = mp, .tp = tp, .iwalk_fn = iwalk_fn, .data = data, .startino = startino, .sz_recs = xfs_iwalk_prefetch(inode_records), .trim_start = 1, .skip_empty = 1, .pwork = XFS_PWORK_SINGLE_THREADED, .lastino = NULLFSINO, }; return xfs_iwalk_args(&iwag, flags); } /* Run per-thread iwalk work. */ static int xfs_iwalk_ag_work( struct xfs_mount *mp, struct xfs_pwork *pwork) { struct xfs_iwalk_ag *iwag; int error = 0; iwag = container_of(pwork, struct xfs_iwalk_ag, pwork); if (xfs_pwork_want_abort(pwork)) goto out; error = xfs_iwalk_alloc(iwag); if (error) goto out; /* * Grab an empty transaction so that we can use its recursive buffer * locking abilities to detect cycles in the inobt without deadlocking. */ error = xfs_trans_alloc_empty(mp, &iwag->tp); if (error) goto out; iwag->drop_trans = 1; error = xfs_iwalk_ag(iwag); if (iwag->tp) xfs_trans_cancel(iwag->tp); xfs_iwalk_free(iwag); out: xfs_perag_put(iwag->pag); kfree(iwag); return error; } /* * Walk all the inodes in the filesystem using multiple threads to process each * AG. */ int xfs_iwalk_threaded( struct xfs_mount *mp, xfs_ino_t startino, unsigned int flags, xfs_iwalk_fn iwalk_fn, unsigned int inode_records, bool polled, void *data) { xfs_agnumber_t start_agno = XFS_INO_TO_AGNO(mp, startino); struct xfs_pwork_ctl pctl; struct xfs_perag *pag = NULL; int error; ASSERT(start_agno < mp->m_sb.sb_agcount); ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk"); if (error) return error; while ((pag = xfs_perag_next_from(mp, pag, start_agno))) { struct xfs_iwalk_ag *iwag; if (xfs_pwork_ctl_want_abort(&pctl)) break; iwag = kzalloc(sizeof(struct xfs_iwalk_ag), GFP_KERNEL | __GFP_NOFAIL); iwag->mp = mp; /* * perag is being handed off to async work, so take a passive * reference for the async work to release. */ iwag->pag = xfs_perag_hold(pag); iwag->iwalk_fn = iwalk_fn; iwag->data = data; iwag->startino = startino; iwag->sz_recs = xfs_iwalk_prefetch(inode_records); iwag->lastino = NULLFSINO; xfs_pwork_queue(&pctl, &iwag->pwork); startino = XFS_AGINO_TO_INO(mp, pag_agno(pag) + 1, 0); if (flags & XFS_IWALK_SAME_AG) break; } if (pag) xfs_perag_rele(pag); if (polled) xfs_pwork_poll(&pctl); return xfs_pwork_destroy(&pctl); } /* * Allow callers to cache up to a page's worth of inobt records. This reflects * the existing inumbers prefetching behavior. Since the inobt walk does not * itself do anything with the inobt records, we can set a fairly high limit * here. */ #define MAX_INOBT_WALK_PREFETCH \ (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore)) /* * Given the number of records that the user wanted, set the number of inobt * records that we buffer in memory. Set the maximum if @inobt_records == 0. */ static inline unsigned int xfs_inobt_walk_prefetch( unsigned int inobt_records) { /* * If the caller didn't tell us the number of inobt records they * wanted, assume the maximum prefetch possible for best performance. */ if (inobt_records == 0) inobt_records = MAX_INOBT_WALK_PREFETCH; /* * Allocate enough space to prefetch at least two inobt records so that * we can cache both the record where the iwalk started and the next * record. This simplifies the AG inode walk loop setup code. */ inobt_records = max(inobt_records, 2U); /* * Cap prefetch at that maximum so that we don't use an absurd amount * of memory. */ return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH); } /* * Walk all inode btree records in the filesystem starting from @startino. The * @inobt_walk_fn will be called for each btree record, being passed the incore * record and @data. @max_prefetch controls how many inobt records we try to * cache ahead of time. */ int xfs_inobt_walk( struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, unsigned int flags, xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records, void *data) { struct xfs_iwalk_ag iwag = { .mp = mp, .tp = tp, .inobt_walk_fn = inobt_walk_fn, .data = data, .startino = startino, .sz_recs = xfs_inobt_walk_prefetch(inobt_records), .pwork = XFS_PWORK_SINGLE_THREADED, .lastino = NULLFSINO, }; return xfs_iwalk_args(&iwag, flags); }
8 8 8 8 15 15 15 15 14 14 14 14 6 6 6 6 14 2 7 2 4 15 15 15 3 1 3 14 15 9 3 6 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_basic.c Basic Packet Classifier. * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/idr.h> #include <linux/percpu.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> struct basic_head { struct list_head flist; struct idr handle_idr; struct rcu_head rcu; }; struct basic_filter { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_result res; struct tcf_proto *tp; struct list_head link; struct tc_basic_pcnt __percpu *pf; struct rcu_work rwork; }; TC_INDIRECT_SCOPE int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { int r; struct basic_head *head = rcu_dereference_bh(tp->root); struct basic_filter *f; list_for_each_entry_rcu(f, &head->flist, link) { __this_cpu_inc(f->pf->rcnt); if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; __this_cpu_inc(f->pf->rhit); *res = f->res; r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) continue; return r; } return -1; } static void *basic_get(struct tcf_proto *tp, u32 handle) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { return f; } } return NULL; } static int basic_init(struct tcf_proto *tp) { struct basic_head *head; head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->flist); idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; } static void __basic_delete_filter(struct basic_filter *f) { tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); free_percpu(f->pf); kfree(f); } static void basic_delete_filter_work(struct work_struct *work) { struct basic_filter *f = container_of(to_rcu_work(work), struct basic_filter, rwork); rtnl_lock(); __basic_delete_filter(f); rtnl_unlock(); } static void basic_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); idr_remove(&head->handle_idr, f->handle); if (tcf_exts_get_net(&f->exts)) tcf_queue_work(&f->rwork, basic_delete_filter_work); else __basic_delete_filter(f); } idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); idr_remove(&head->handle_idr, f->handle); tcf_exts_get_net(&f->exts); tcf_queue_work(&f->rwork, basic_delete_filter_work); *last = list_empty(&head->flist); return 0; } static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { [TCA_BASIC_CLASSID] = { .type = NLA_U32 }, [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, }; static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches); if (err < 0) return err; if (tb[TCA_BASIC_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]); tcf_bind_filter(tp, &f->res, base); } f->tp = tp; return 0; } static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *fold = (struct basic_filter *) *arg; struct basic_filter *fnew; if (tca[TCA_OPTIONS] == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], basic_policy, NULL); if (err < 0) return err; if (fold != NULL) { if (handle && fold->handle != handle) return -EINVAL; } fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); if (!fnew) return -ENOBUFS; err = tcf_exts_init(&fnew->exts, net, TCA_BASIC_ACT, TCA_BASIC_POLICE); if (err < 0) goto errout; if (!handle) { handle = 1; err = idr_alloc_u32(&head->handle_idr, fnew, &handle, INT_MAX, GFP_KERNEL); } else if (!fold) { err = idr_alloc_u32(&head->handle_idr, fnew, &handle, handle, GFP_KERNEL); } if (err) goto errout; fnew->handle = handle; fnew->pf = alloc_percpu(struct tc_basic_pcnt); if (!fnew->pf) { err = -ENOMEM; goto errout; } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags, extack); if (err < 0) { if (!fold) idr_remove(&head->handle_idr, fnew->handle); goto errout; } *arg = fnew; if (fold) { idr_replace(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); tcf_queue_work(&fold->rwork, basic_delete_filter_work); } else { list_add_rcu(&fnew->link, &head->flist); } return 0; errout: free_percpu(fnew->pf); tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { if (!tc_cls_stats_dump(tp, arg, f)) break; } } static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q, unsigned long base) { struct basic_filter *f = fh; tc_cls_bind_class(classid, cl, q, &f->res, base); } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; struct nlattr *nest; int cpu; if (f == NULL) return skb->len; t->tcm_handle = f->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (f->res.classid && nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) goto nla_put_failure; for_each_possible_cpu(cpu) { struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu); gpf.rcnt += pf->rcnt; gpf.rhit += pf->rhit; } if (nla_put_64bit(skb, TCA_BASIC_PCNT, sizeof(struct tc_basic_pcnt), &gpf, TCA_BASIC_PAD)) goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static struct tcf_proto_ops cls_basic_ops __read_mostly = { .kind = "basic", .classify = basic_classify, .init = basic_init, .destroy = basic_destroy, .get = basic_get, .change = basic_change, .delete = basic_delete, .walk = basic_walk, .dump = basic_dump, .bind_class = basic_bind_class, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_CLS("basic"); static int __init init_basic(void) { return register_tcf_proto_ops(&cls_basic_ops); } static void __exit exit_basic(void) { unregister_tcf_proto_ops(&cls_basic_ops); } module_init(init_basic) module_exit(exit_basic) MODULE_DESCRIPTION("TC basic classifier"); MODULE_LICENSE("GPL");
2 1 3 3 1 1 1 1 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 // SPDX-License-Identifier: GPL-2.0-or-later /* * SQ905C subdriver * * Copyright (C) 2009 Theodore Kilgore */ /* * * This driver uses work done in * libgphoto2/camlibs/digigr8, Copyright (C) Theodore Kilgore. * * This driver has also used as a base the sq905c driver * and may contain code fragments from it. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sq905c" #include <linux/workqueue.h> #include <linux/slab.h> #include "gspca.h" MODULE_AUTHOR("Theodore Kilgore <kilgota@auburn.edu>"); MODULE_DESCRIPTION("GSPCA/SQ905C USB Camera Driver"); MODULE_LICENSE("GPL"); /* Default timeouts, in ms */ #define SQ905C_CMD_TIMEOUT 500 #define SQ905C_DATA_TIMEOUT 1000 /* Maximum transfer size to use. */ #define SQ905C_MAX_TRANSFER 0x8000 #define FRAME_HEADER_LEN 0x50 /* Commands. These go in the "value" slot. */ #define SQ905C_CLEAR 0xa0 /* clear everything */ #define SQ905C_GET_ID 0x14f4 /* Read version number */ #define SQ905C_CAPTURE_LOW 0xa040 /* Starts capture at 160x120 */ #define SQ905C_CAPTURE_MED 0x1440 /* Starts capture at 320x240 */ #define SQ905C_CAPTURE_HI 0x2840 /* Starts capture at 320x240 */ /* For capture, this must go in the "index" slot. */ #define SQ905C_CAPTURE_INDEX 0x110f /* Structure to hold all of our device specific stuff */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ const struct v4l2_pix_format *cap_mode; /* Driver stuff */ struct work_struct work_struct; struct workqueue_struct *work_thread; }; /* * Most of these cameras will do 640x480 and 320x240. 160x120 works * in theory but gives very poor output. Therefore, not supported. * The 0x2770:0x9050 cameras have max resolution of 320x240. */ static struct v4l2_pix_format sq905c_mode[] = { { 320, 240, V4L2_PIX_FMT_SQ905C, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, { 640, 480, V4L2_PIX_FMT_SQ905C, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0} }; /* Send a command to the camera. */ static int sq905c_command(struct gspca_dev *gspca_dev, u16 command, u16 index) { int ret; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, command, index, NULL, 0, SQ905C_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } return 0; } static int sq905c_read(struct gspca_dev *gspca_dev, u16 command, u16 index, int size) { int ret; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, command, index, gspca_dev->usb_buf, size, SQ905C_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } return 0; } /* * This function is called as a workqueue function and runs whenever the camera * is streaming data. Because it is a workqueue function it is allowed to sleep * so we can use synchronous USB calls. To avoid possible collisions with other * threads attempting to use gspca_dev->usb_buf we take the usb_lock when * performing USB operations using it. In practice we don't really need this * as the camera doesn't provide any controls. */ static void sq905c_dostream(struct work_struct *work) { struct sd *dev = container_of(work, struct sd, work_struct); struct gspca_dev *gspca_dev = &dev->gspca_dev; int bytes_left; /* bytes remaining in current frame. */ int data_len; /* size to use for the next read. */ int act_len; int packet_type; int ret; u8 *buffer; buffer = kmalloc(SQ905C_MAX_TRANSFER, GFP_KERNEL); if (!buffer) { pr_err("Couldn't allocate USB buffer\n"); goto quit_stream; } while (gspca_dev->present && gspca_dev->streaming) { #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif /* Request the header, which tells the size to download */ ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), buffer, FRAME_HEADER_LEN, &act_len, SQ905C_DATA_TIMEOUT); gspca_dbg(gspca_dev, D_STREAM, "Got %d bytes out of %d for header\n", act_len, FRAME_HEADER_LEN); if (ret < 0 || act_len < FRAME_HEADER_LEN) goto quit_stream; /* size is read from 4 bytes starting 0x40, little endian */ bytes_left = buffer[0x40]|(buffer[0x41]<<8)|(buffer[0x42]<<16) |(buffer[0x43]<<24); gspca_dbg(gspca_dev, D_STREAM, "bytes_left = 0x%x\n", bytes_left); /* We keep the header. It has other information, too. */ packet_type = FIRST_PACKET; gspca_frame_add(gspca_dev, packet_type, buffer, FRAME_HEADER_LEN); while (bytes_left > 0 && gspca_dev->present) { data_len = bytes_left > SQ905C_MAX_TRANSFER ? SQ905C_MAX_TRANSFER : bytes_left; ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), buffer, data_len, &act_len, SQ905C_DATA_TIMEOUT); if (ret < 0 || act_len < data_len) goto quit_stream; gspca_dbg(gspca_dev, D_STREAM, "Got %d bytes out of %d for frame\n", data_len, bytes_left); bytes_left -= data_len; if (bytes_left == 0) packet_type = LAST_PACKET; else packet_type = INTER_PACKET; gspca_frame_add(gspca_dev, packet_type, buffer, data_len); } } quit_stream: if (gspca_dev->present) { mutex_lock(&gspca_dev->usb_lock); sq905c_command(gspca_dev, SQ905C_CLEAR, 0); mutex_unlock(&gspca_dev->usb_lock); } kfree(buffer); } /* This function is called at probe time just before sd_init */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam = &gspca_dev->cam; struct sd *dev = (struct sd *) gspca_dev; int ret; gspca_dbg(gspca_dev, D_PROBE, "SQ9050 camera detected (vid/pid 0x%04X:0x%04X)\n", id->idVendor, id->idProduct); ret = sq905c_command(gspca_dev, SQ905C_GET_ID, 0); if (ret < 0) { gspca_err(gspca_dev, "Get version command failed\n"); return ret; } ret = sq905c_read(gspca_dev, 0xf5, 0, 20); if (ret < 0) { gspca_err(gspca_dev, "Reading version command failed\n"); return ret; } /* Note we leave out the usb id and the manufacturing date */ gspca_dbg(gspca_dev, D_PROBE, "SQ9050 ID string: %02x - %*ph\n", gspca_dev->usb_buf[3], 6, gspca_dev->usb_buf + 14); cam->cam_mode = sq905c_mode; cam->nmodes = 2; if (gspca_dev->usb_buf[15] == 0) cam->nmodes = 1; /* We don't use the buffer gspca allocates so make it small. */ cam->bulk_size = 32; cam->bulk = 1; INIT_WORK(&dev->work_struct, sq905c_dostream); return 0; } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; /* wait for the work queue to terminate */ mutex_unlock(&gspca_dev->usb_lock); /* This waits for sq905c_dostream to finish */ destroy_workqueue(dev->work_thread); dev->work_thread = NULL; mutex_lock(&gspca_dev->usb_lock); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { /* connect to the camera and reset it. */ return sq905c_command(gspca_dev, SQ905C_CLEAR, 0); } /* Set up for getting frames. */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; int ret; dev->cap_mode = gspca_dev->cam.cam_mode; /* "Open the shutter" and set size, to start capture */ switch (gspca_dev->pixfmt.width) { case 640: gspca_dbg(gspca_dev, D_STREAM, "Start streaming at high resolution\n"); dev->cap_mode++; ret = sq905c_command(gspca_dev, SQ905C_CAPTURE_HI, SQ905C_CAPTURE_INDEX); break; default: /* 320 */ gspca_dbg(gspca_dev, D_STREAM, "Start streaming at medium resolution\n"); ret = sq905c_command(gspca_dev, SQ905C_CAPTURE_MED, SQ905C_CAPTURE_INDEX); } if (ret < 0) { gspca_err(gspca_dev, "Start streaming command failed\n"); return ret; } /* Start the workqueue function to do the streaming */ dev->work_thread = create_singlethread_workqueue(MODULE_NAME); if (!dev->work_thread) return -ENOMEM; queue_work(dev->work_thread, &dev->work_struct); return 0; } /* Table of supported USB devices */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x2770, 0x905c)}, {USB_DEVICE(0x2770, 0x9050)}, {USB_DEVICE(0x2770, 0x9051)}, {USB_DEVICE(0x2770, 0x9052)}, {USB_DEVICE(0x2770, 0x913d)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .start = sd_start, .stop0 = sd_stop0, }; /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
11 11 200 115 510 154 130 26 157 1 155 26 125 147 147 24 123 155 1 156 153 153 17 17 17 17 17 15 17 174 155 17 371 199 172 371 369 1 366 363 184 188 365 3 3 99 8 97 98 63 36 27 27 16 11 11 27 27 6 24 101 101 101 101 36 10 55 18 45 79 21 99 21 79 99 63 36 17 17 1 15 27 16 11 4992 4987 4985 4995 4995 6 6 6 6 6 6 26 26 26 28 28 2 7 19 8 8 8 3082 3085 3082 3083 3 3087 1314 3085 6694 2975 6692 3054 6692 6695 6632 64 6685 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/file.c - kernfs file implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/fsnotify.h> #include <linux/uio.h> #include "kernfs-internal.h" struct kernfs_open_node { struct rcu_head rcu_head; atomic_t event; wait_queue_head_t poll; struct list_head files; /* goes through kernfs_open_file.list */ unsigned int nr_mmapped; unsigned int nr_to_release; }; /* * kernfs_notify() may be called from any context and bounces notifications * through a work item. To minimize space overhead in kernfs_node, the * pending queue is implemented as a singly linked list of kernfs_nodes. * The list is terminated with the self pointer so that whether a * kernfs_node is on the list or not can be determined by testing the next * pointer for %NULL. */ #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) static DEFINE_SPINLOCK(kernfs_notify_lock); static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) { int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS); return &kernfs_locks->open_file_mutex[idx]; } static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) { struct mutex *lock; lock = kernfs_open_file_mutex_ptr(kn); mutex_lock(lock); return lock; } /** * of_on - Get the kernfs_open_node of the specified kernfs_open_file * @of: target kernfs_open_file * * Return: the kernfs_open_node of the kernfs_open_file */ static struct kernfs_open_node *of_on(struct kernfs_open_file *of) { return rcu_dereference_protected(of->kn->attr.open, !list_empty(&of->list)); } /** * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn * * @kn: target kernfs_node. * * Fetch and return ->attr.open of @kn when caller holds the * kernfs_open_file_mutex_ptr(kn). * * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when * the caller guarantees that this mutex is being held, other updaters can't * change ->attr.open and this means that we can safely deref ->attr.open * outside RCU read-side critical section. * * The caller needs to make sure that kernfs_open_file_mutex is held. * * Return: @kn->attr.open when kernfs_open_file_mutex is held. */ static struct kernfs_open_node * kernfs_deref_open_node_locked(struct kernfs_node *kn) { return rcu_dereference_protected(kn->attr.open, lockdep_is_held(kernfs_open_file_mutex_ptr(kn))); } static struct kernfs_open_file *kernfs_of(struct file *file) { return ((struct seq_file *)file->private_data)->private; } /* * Determine the kernfs_ops for the given kernfs_node. This function must * be called while holding an active reference. */ static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) { if (kn->flags & KERNFS_LOCKDEP) lockdep_assert_held(kn); return kn->attr.ops; } /* * As kernfs_seq_stop() is also called after kernfs_seq_start() or * kernfs_seq_next() failure, it needs to distinguish whether it's stopping * a seq_file iteration which is fully initialized with an active reference * or an aborted kernfs_seq_start() due to get_active failure. The * position pointer is the only context for each seq_file iteration and * thus the stop condition should be encoded in it. As the return value is * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable * choice to indicate get_active failure. * * Unfortunately, this is complicated due to the optional custom seq_file * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or * custom seq_file operations and thus can't decide whether put_active * should be performed or not only on ERR_PTR(-ENODEV). * * This is worked around by factoring out the custom seq_stop() and * put_active part into kernfs_seq_stop_active(), skipping it from * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures * that kernfs_seq_stop_active() is skipped only after get_active failure. */ static void kernfs_seq_stop_active(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; const struct kernfs_ops *ops = kernfs_ops(of->kn); if (ops->seq_stop) ops->seq_stop(sf, v); kernfs_put_active(of->kn); } static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) { struct kernfs_open_file *of = sf->private; const struct kernfs_ops *ops; /* * @of->mutex nests outside active ref and is primarily to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) return ERR_PTR(-ENODEV); ops = kernfs_ops(of->kn); if (ops->seq_start) { void *next = ops->seq_start(sf, ppos); /* see the comment above kernfs_seq_stop_active() */ if (next == ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, next); return next; } return single_start(sf, ppos); } static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) { struct kernfs_open_file *of = sf->private; const struct kernfs_ops *ops = kernfs_ops(of->kn); if (ops->seq_next) { void *next = ops->seq_next(sf, v, ppos); /* see the comment above kernfs_seq_stop_active() */ if (next == ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, next); return next; } else { /* * The same behavior and code as single_open(), always * terminate after the initial read. */ ++*ppos; return NULL; } } static void kernfs_seq_stop(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; if (v != ERR_PTR(-ENODEV)) kernfs_seq_stop_active(sf, v); mutex_unlock(&of->mutex); } static int kernfs_seq_show(struct seq_file *sf, void *v) { struct kernfs_open_file *of = sf->private; of->event = atomic_read(&of_on(of)->event); return of->kn->attr.ops->seq_show(sf, v); } static const struct seq_operations kernfs_seq_ops = { .start = kernfs_seq_start, .next = kernfs_seq_next, .stop = kernfs_seq_stop, .show = kernfs_seq_show, }; /* * As reading a bin file can have side-effects, the exact offset and bytes * specified in read(2) call should be passed to the read callback making * it difficult to use seq_file. Implement simplistic custom buffering for * bin files. */ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); const struct kernfs_ops *ops; char *buf; buf = of->prealloc_buf; if (buf) mutex_lock(&of->prealloc_mutex); else buf = kmalloc(len, GFP_KERNEL); if (!buf) return -ENOMEM; /* * @of->mutex nests outside active ref and is used both to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) { len = -ENODEV; mutex_unlock(&of->mutex); goto out_free; } of->event = atomic_read(&of_on(of)->event); ops = kernfs_ops(of->kn); if (ops->read) len = ops->read(of, buf, len, iocb->ki_pos); else len = -EINVAL; kernfs_put_active(of->kn); mutex_unlock(&of->mutex); if (len < 0) goto out_free; if (copy_to_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) mutex_unlock(&of->prealloc_mutex); else kfree(buf); return len; } static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) { if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) return seq_read_iter(iocb, iter); return kernfs_file_read_iter(iocb, iter); } /* * Copy data in from userland and pass it to the matching kernfs write * operation. * * There is no easy way for us to know if userspace is only doing a partial * write, so we don't support them. We expect the entire buffer to come on * the first write. Hint: if you're writing a value, first read the file, * modify only the value you're changing, then write entire buffer * back. */ static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) { struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); ssize_t len = iov_iter_count(iter); const struct kernfs_ops *ops; char *buf; if (of->atomic_write_len) { if (len > of->atomic_write_len) return -E2BIG; } else { len = min_t(size_t, len, PAGE_SIZE); } buf = of->prealloc_buf; if (buf) mutex_lock(&of->prealloc_mutex); else buf = kmalloc(len + 1, GFP_KERNEL); if (!buf) return -ENOMEM; if (copy_from_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } buf[len] = '\0'; /* guarantee string termination */ /* * @of->mutex nests outside active ref and is used both to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) { mutex_unlock(&of->mutex); len = -ENODEV; goto out_free; } ops = kernfs_ops(of->kn); if (ops->write) len = ops->write(of, buf, len, iocb->ki_pos); else len = -EINVAL; kernfs_put_active(of->kn); mutex_unlock(&of->mutex); if (len > 0) iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) mutex_unlock(&of->prealloc_mutex); else kfree(buf); return len; } static void kernfs_vma_open(struct vm_area_struct *vma) { struct file *file = vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); if (!of->vm_ops) return; if (!kernfs_get_active(of->kn)) return; if (of->vm_ops->open) of->vm_ops->open(vma); kernfs_put_active(of->kn); } static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); vm_fault_t ret; if (!of->vm_ops) return VM_FAULT_SIGBUS; if (!kernfs_get_active(of->kn)) return VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS; if (of->vm_ops->fault) ret = of->vm_ops->fault(vmf); kernfs_put_active(of->kn); return ret; } static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); vm_fault_t ret; if (!of->vm_ops) return VM_FAULT_SIGBUS; if (!kernfs_get_active(of->kn)) return VM_FAULT_SIGBUS; ret = 0; if (of->vm_ops->page_mkwrite) ret = of->vm_ops->page_mkwrite(vmf); else file_update_time(file); kernfs_put_active(of->kn); return ret; } static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write) { struct file *file = vma->vm_file; struct kernfs_open_file *of = kernfs_of(file); int ret; if (!of->vm_ops) return -EINVAL; if (!kernfs_get_active(of->kn)) return -EINVAL; ret = -EINVAL; if (of->vm_ops->access) ret = of->vm_ops->access(vma, addr, buf, len, write); kernfs_put_active(of->kn); return ret; } static const struct vm_operations_struct kernfs_vm_ops = { .open = kernfs_vma_open, .fault = kernfs_vma_fault, .page_mkwrite = kernfs_vma_page_mkwrite, .access = kernfs_vma_access, }; static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; int rc; /* * mmap path and of->mutex are prone to triggering spurious lockdep * warnings and we don't want to add spurious locking dependency * between the two. Check whether mmap is actually implemented * without grabbing @of->mutex by testing HAS_MMAP flag. See the * comment in kernfs_fop_open() for more details. */ if (!(of->kn->flags & KERNFS_HAS_MMAP)) return -ENODEV; mutex_lock(&of->mutex); rc = -ENODEV; if (!kernfs_get_active(of->kn)) goto out_unlock; ops = kernfs_ops(of->kn); rc = ops->mmap(of, vma); if (rc) goto out_put; /* * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() * to satisfy versions of X which crash if the mmap fails: that * substitutes a new vm_file, and we don't then want bin_vm_ops. */ if (vma->vm_file != file) goto out_put; rc = -EINVAL; if (of->mmapped && of->vm_ops != vma->vm_ops) goto out_put; /* * It is not possible to successfully wrap close. * So error if someone is trying to use close. */ if (vma->vm_ops && vma->vm_ops->close) goto out_put; rc = 0; if (!of->mmapped) { of->mmapped = true; of_on(of)->nr_mmapped++; of->vm_ops = vma->vm_ops; } vma->vm_ops = &kernfs_vm_ops; out_put: kernfs_put_active(of->kn); out_unlock: mutex_unlock(&of->mutex); return rc; } /** * kernfs_get_open_node - get or create kernfs_open_node * @kn: target kernfs_node * @of: kernfs_open_file for this instance of open * * If @kn->attr.open exists, increment its reference count; otherwise, * create one. @of is chained to the files list. * * Locking: * Kernel thread context (may sleep). * * Return: * %0 on success, -errno on failure. */ static int kernfs_get_open_node(struct kernfs_node *kn, struct kernfs_open_file *of) { struct kernfs_open_node *on; struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); on = kernfs_deref_open_node_locked(kn); if (!on) { /* not there, initialize a new one */ on = kzalloc(sizeof(*on), GFP_KERNEL); if (!on) { mutex_unlock(mutex); return -ENOMEM; } atomic_set(&on->event, 1); init_waitqueue_head(&on->poll); INIT_LIST_HEAD(&on->files); rcu_assign_pointer(kn->attr.open, on); } list_add_tail(&of->list, &on->files); if (kn->flags & KERNFS_HAS_RELEASE) on->nr_to_release++; mutex_unlock(mutex); return 0; } /** * kernfs_unlink_open_file - Unlink @of from @kn. * * @kn: target kernfs_node * @of: associated kernfs_open_file * @open_failed: ->open() failed, cancel ->release() * * Unlink @of from list of @kn's associated open files. If list of * associated open files becomes empty, disassociate and free * kernfs_open_node. * * LOCKING: * None. */ static void kernfs_unlink_open_file(struct kernfs_node *kn, struct kernfs_open_file *of, bool open_failed) { struct kernfs_open_node *on; struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); on = kernfs_deref_open_node_locked(kn); if (!on) { mutex_unlock(mutex); return; } if (of) { if (kn->flags & KERNFS_HAS_RELEASE) { WARN_ON_ONCE(of->released == open_failed); if (open_failed) on->nr_to_release--; } if (of->mmapped) on->nr_mmapped--; list_del(&of->list); } if (list_empty(&on->files)) { rcu_assign_pointer(kn->attr.open, NULL); kfree_rcu(on, rcu_head); } mutex_unlock(mutex); } static int kernfs_fop_open(struct inode *inode, struct file *file) { struct kernfs_node *kn = inode->i_private; struct kernfs_root *root = kernfs_root(kn); const struct kernfs_ops *ops; struct kernfs_open_file *of; bool has_read, has_write, has_mmap; int error = -EACCES; if (!kernfs_get_active(kn)) return -ENODEV; ops = kernfs_ops(kn); has_read = ops->seq_show || ops->read || ops->mmap; has_write = ops->write || ops->mmap; has_mmap = ops->mmap; /* see the flag definition for details */ if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { if ((file->f_mode & FMODE_WRITE) && (!(inode->i_mode & S_IWUGO) || !has_write)) goto err_out; if ((file->f_mode & FMODE_READ) && (!(inode->i_mode & S_IRUGO) || !has_read)) goto err_out; } /* allocate a kernfs_open_file for the file */ error = -ENOMEM; of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); if (!of) goto err_out; /* * The following is done to give a different lockdep key to * @of->mutex for files which implement mmap. This is a rather * crude way to avoid false positive lockdep warning around * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under * which mm->mmap_lock nests, while holding @of->mutex. As each * open file has a separate mutex, it's okay as long as those don't * happen on the same file. At this point, we can't easily give * each file a separate locking class. Let's differentiate on * whether the file has mmap or not for now. * * For similar reasons, writable and readonly files are given different * lockdep key, because the writable file /sys/power/resume may call vfs * lookup helpers for arbitrary paths and readonly files can be read by * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs. * * All three cases look the same. They're supposed to * look that way and give @of->mutex different static lockdep keys. */ if (has_mmap) mutex_init(&of->mutex); else if (file->f_mode & FMODE_WRITE) mutex_init(&of->mutex); else mutex_init(&of->mutex); of->kn = kn; of->file = file; /* * Write path needs to atomic_write_len outside active reference. * Cache it in open_file. See kernfs_fop_write_iter() for details. */ of->atomic_write_len = ops->atomic_write_len; error = -EINVAL; /* * ->seq_show is incompatible with ->prealloc, * as seq_read does its own allocation. * ->read must be used instead. */ if (ops->prealloc && ops->seq_show) goto err_free; if (ops->prealloc) { int len = of->atomic_write_len ?: PAGE_SIZE; of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); error = -ENOMEM; if (!of->prealloc_buf) goto err_free; mutex_init(&of->prealloc_mutex); } /* * Always instantiate seq_file even if read access doesn't use * seq_file or is not requested. This unifies private data access * and readable regular files are the vast majority anyway. */ if (ops->seq_show) error = seq_open(file, &kernfs_seq_ops); else error = seq_open(file, NULL); if (error) goto err_free; of->seq_file = file->private_data; of->seq_file->private = of; /* seq_file clears PWRITE unconditionally, restore it if WRITE */ if (file->f_mode & FMODE_WRITE) file->f_mode |= FMODE_PWRITE; /* make sure we have open node struct */ error = kernfs_get_open_node(kn, of); if (error) goto err_seq_release; if (ops->open) { /* nobody has access to @of yet, skip @of->mutex */ error = ops->open(of); if (error) goto err_put_node; } /* open succeeded, put active references */ kernfs_put_active(kn); return 0; err_put_node: kernfs_unlink_open_file(kn, of, true); err_seq_release: seq_release(inode, file); err_free: kfree(of->prealloc_buf); kfree(of); err_out: kernfs_put_active(kn); return error; } /* used from release/drain to ensure that ->release() is called exactly once */ static void kernfs_release_file(struct kernfs_node *kn, struct kernfs_open_file *of) { /* * @of is guaranteed to have no other file operations in flight and * we just want to synchronize release and drain paths. * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used * here because drain path may be called from places which can * cause circular dependency. */ lockdep_assert_held(kernfs_open_file_mutex_ptr(kn)); if (!of->released) { /* * A file is never detached without being released and we * need to be able to release files which are deactivated * and being drained. Don't use kernfs_ops(). */ kn->attr.ops->release(of); of->released = true; of_on(of)->nr_to_release--; } } static int kernfs_fop_release(struct inode *inode, struct file *filp) { struct kernfs_node *kn = inode->i_private; struct kernfs_open_file *of = kernfs_of(filp); if (kn->flags & KERNFS_HAS_RELEASE) { struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); kernfs_release_file(kn, of); mutex_unlock(mutex); } kernfs_unlink_open_file(kn, of, false); seq_release(inode, filp); kfree(of->prealloc_buf); kfree(of); return 0; } bool kernfs_should_drain_open_files(struct kernfs_node *kn) { struct kernfs_open_node *on; bool ret; /* * @kn being deactivated guarantees that @kn->attr.open can't change * beneath us making the lockless test below safe. * Callers post kernfs_unbreak_active_protection may be counted in * kn->active by now, do not WARN_ON because of them. */ rcu_read_lock(); on = rcu_dereference(kn->attr.open); ret = on && (on->nr_mmapped || on->nr_to_release); rcu_read_unlock(); return ret; } void kernfs_drain_open_files(struct kernfs_node *kn) { struct kernfs_open_node *on; struct kernfs_open_file *of; struct mutex *mutex; mutex = kernfs_open_file_mutex_lock(kn); on = kernfs_deref_open_node_locked(kn); if (!on) { mutex_unlock(mutex); return; } list_for_each_entry(of, &on->files, list) { struct inode *inode = file_inode(of->file); if (of->mmapped) { unmap_mapping_range(inode->i_mapping, 0, 0, 1); of->mmapped = false; on->nr_mmapped--; } if (kn->flags & KERNFS_HAS_RELEASE) kernfs_release_file(kn, of); } WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release); mutex_unlock(mutex); } /* * Kernfs attribute files are pollable. The idea is that you read * the content and then you use 'poll' or 'select' to wait for * the content to change. When the content changes (assuming the * manager for the kobject supports notification), poll will * return EPOLLERR|EPOLLPRI, and select will return the fd whether * it is waiting for read, write, or exceptions. * Once poll/select indicates that the value has changed, you * need to close and re-open the file, or seek to 0 and read again. * Reminder: this only works for attributes which actively support * it, and it is not possible to test an attribute from userspace * to see if it supports poll (Neither 'poll' nor 'select' return * an appropriate error code). When in doubt, set a suitable timeout value. */ __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) { struct kernfs_open_node *on = of_on(of); poll_wait(of->file, &on->poll, wait); if (of->event != atomic_read(&on->event)) return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; return DEFAULT_POLLMASK; } static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) { struct kernfs_open_file *of = kernfs_of(filp); struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); __poll_t ret; if (!kernfs_get_active(kn)) return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; if (kn->attr.ops->poll) ret = kn->attr.ops->poll(of, wait); else ret = kernfs_generic_poll(of, wait); kernfs_put_active(kn); return ret; } static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; loff_t ret; /* * @of->mutex nests outside active ref and is primarily to ensure that * the ops aren't called concurrently for the same open file. */ mutex_lock(&of->mutex); if (!kernfs_get_active(of->kn)) { mutex_unlock(&of->mutex); return -ENODEV; } ops = kernfs_ops(of->kn); if (ops->llseek) ret = ops->llseek(of, offset, whence); else ret = generic_file_llseek(file, offset, whence); kernfs_put_active(of->kn); mutex_unlock(&of->mutex); return ret; } static void kernfs_notify_workfn(struct work_struct *work) { struct kernfs_node *kn; struct kernfs_super_info *info; struct kernfs_root *root; repeat: /* pop one off the notify_list */ spin_lock_irq(&kernfs_notify_lock); kn = kernfs_notify_list; if (kn == KERNFS_NOTIFY_EOL) { spin_unlock_irq(&kernfs_notify_lock); return; } kernfs_notify_list = kn->attr.notify_next; kn->attr.notify_next = NULL; spin_unlock_irq(&kernfs_notify_lock); root = kernfs_root(kn); /* kick fsnotify */ down_read(&root->kernfs_supers_rwsem); down_read(&root->kernfs_rwsem); list_for_each_entry(info, &kernfs_root(kn)->supers, node) { struct kernfs_node *parent; struct inode *p_inode = NULL; const char *kn_name; struct inode *inode; struct qstr name; /* * We want fsnotify_modify() on @kn but as the * modifications aren't originating from userland don't * have the matching @file available. Look up the inodes * and generate the events manually. */ inode = ilookup(info->sb, kernfs_ino(kn)); if (!inode) continue; kn_name = kernfs_rcu_name(kn); name = QSTR(kn_name); parent = kernfs_get_parent(kn); if (parent) { p_inode = ilookup(info->sb, kernfs_ino(parent)); if (p_inode) { fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, inode, FSNOTIFY_EVENT_INODE, p_inode, &name, inode, 0); iput(p_inode); } kernfs_put(parent); } if (!p_inode) fsnotify_inode(inode, FS_MODIFY); iput(inode); } up_read(&root->kernfs_rwsem); up_read(&root->kernfs_supers_rwsem); kernfs_put(kn); goto repeat; } /** * kernfs_notify - notify a kernfs file * @kn: file to notify * * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any * context. */ void kernfs_notify(struct kernfs_node *kn) { static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); unsigned long flags; struct kernfs_open_node *on; if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) return; /* kick poll immediately */ rcu_read_lock(); on = rcu_dereference(kn->attr.open); if (on) { atomic_inc(&on->event); wake_up_interruptible(&on->poll); } rcu_read_unlock(); /* schedule work to kick fsnotify */ spin_lock_irqsave(&kernfs_notify_lock, flags); if (!kn->attr.notify_next) { kernfs_get(kn); kn->attr.notify_next = kernfs_notify_list; kernfs_notify_list = kn; schedule_work(&kernfs_notify_work); } spin_unlock_irqrestore(&kernfs_notify_lock, flags); } EXPORT_SYMBOL_GPL(kernfs_notify); const struct file_operations kernfs_file_fops = { .read_iter = kernfs_fop_read_iter, .write_iter = kernfs_fop_write_iter, .llseek = kernfs_fop_llseek, .mmap = kernfs_fop_mmap, .open = kernfs_fop_open, .release = kernfs_fop_release, .poll = kernfs_fop_poll, .fsync = noop_fsync, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, }; /** * __kernfs_create_file - kernfs internal function to create a file * @parent: directory to create the file in * @name: name of the file * @mode: mode of the file * @uid: uid of the file * @gid: gid of the file * @size: size of the file * @ops: kernfs operations for the file * @priv: private data for the file * @ns: optional namespace tag of the file * @key: lockdep key for the file's active_ref, %NULL to disable lockdep * * Return: the created node on success, ERR_PTR() value on error. */ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) { struct kernfs_node *kn; unsigned flags; int rc; flags = KERNFS_FILE; kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, uid, gid, flags); if (!kn) return ERR_PTR(-ENOMEM); kn->attr.ops = ops; kn->attr.size = size; kn->ns = ns; kn->priv = priv; #ifdef CONFIG_DEBUG_LOCK_ALLOC if (key) { lockdep_init_map(&kn->dep_map, "kn->active", key, 0); kn->flags |= KERNFS_LOCKDEP; } #endif /* * kn->attr.ops is accessible only while holding active ref. We * need to know whether some ops are implemented outside active * ref. Cache their existence in flags. */ if (ops->seq_show) kn->flags |= KERNFS_HAS_SEQ_SHOW; if (ops->mmap) kn->flags |= KERNFS_HAS_MMAP; if (ops->release) kn->flags |= KERNFS_HAS_RELEASE; rc = kernfs_add_one(kn); if (rc) { kernfs_put(kn); return ERR_PTR(rc); } return kn; }
25 67 128 4 23 9 2 23 23 7 11 9 23 23 4 4 1 2 1 1 1 6 3 3 4 1 1 3 2 1 1 3 1 2 2 141 9 132 66 66 60 4 2 18 15 15 33 30 25 25 25 25 27 1 27 1 27 33 33 30 33 33 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/msdos.c * * Code extracted from drivers/block/genhd.c * Copyright (C) 1991-1998 Linus Torvalds * * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug * in the early extended-partition checks and added DM partitions * * Support for DiskManager v6.0x added by Mark Lord, * with information provided by OnTrack. This now works for linux fdisk * and LILO, as well as loadlin and bootln. Note that disks other than * /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1). * * More flexible handling of extended partitions - aeb, 950831 * * Check partition table on IDE disks for common CHS translations * * Re-organised Feb 1998 Russell King * * BSD disklabel support by Yossi Gottlieb <yogo@math.tau.ac.il> * updated by Marc Espie <Marc.Espie@openbsd.org> * * Unixware slices support by Andrzej Krzysztofowicz <ankry@mif.pg.gda.pl> * and Krzysztof G. Baranowski <kgb@knm.org.pl> */ #include <linux/msdos_fs.h> #include <linux/msdos_partition.h> #include "check.h" #include "efi.h" /* * Many architectures don't like unaligned accesses, while * the nr_sects and start_sect partition table entries are * at a 2 (mod 4) address. */ #include <linux/unaligned.h> static inline sector_t nr_sects(struct msdos_partition *p) { return (sector_t)get_unaligned_le32(&p->nr_sects); } static inline sector_t start_sect(struct msdos_partition *p) { return (sector_t)get_unaligned_le32(&p->start_sect); } static inline int is_extended_partition(struct msdos_partition *p) { return (p->sys_ind == DOS_EXTENDED_PARTITION || p->sys_ind == WIN98_EXTENDED_PARTITION || p->sys_ind == LINUX_EXTENDED_PARTITION); } #define MSDOS_LABEL_MAGIC1 0x55 #define MSDOS_LABEL_MAGIC2 0xAA static inline int msdos_magic_present(unsigned char *p) { return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2); } /* Value is EBCDIC 'IBMA' */ #define AIX_LABEL_MAGIC1 0xC9 #define AIX_LABEL_MAGIC2 0xC2 #define AIX_LABEL_MAGIC3 0xD4 #define AIX_LABEL_MAGIC4 0xC1 static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) { struct msdos_partition *pt = (struct msdos_partition *) (p + 0x1be); Sector sect; unsigned char *d; int slot, ret = 0; if (!(p[0] == AIX_LABEL_MAGIC1 && p[1] == AIX_LABEL_MAGIC2 && p[2] == AIX_LABEL_MAGIC3 && p[3] == AIX_LABEL_MAGIC4)) return 0; /* * Assume the partition table is valid if Linux partitions exists. * Note that old Solaris/x86 partitions use the same indicator as * Linux swap partitions, so we consider that a Linux partition as * well. */ for (slot = 1; slot <= 4; slot++, pt++) { if (pt->sys_ind == SOLARIS_X86_PARTITION || pt->sys_ind == LINUX_RAID_PARTITION || pt->sys_ind == LINUX_DATA_PARTITION || pt->sys_ind == LINUX_LVM_PARTITION || is_extended_partition(pt)) return 0; } d = read_part_sector(state, 7, &sect); if (d) { if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') ret = 1; put_dev_sector(sect); } return ret; } static void set_info(struct parsed_partitions *state, int slot, u32 disksig) { struct partition_meta_info *info = &state->parts[slot].info; snprintf(info->uuid, sizeof(info->uuid), "%08x-%02x", disksig, slot); info->volname[0] = 0; state->parts[slot].has_info = true; } /* * Create devices for each logical partition in an extended partition. * The logical partitions form a linked list, with each entry being * a partition table with two entries. The first entry * is the real data partition (with a start relative to the partition * table start). The second is a pointer to the next logical partition * (with a start relative to the entire extended partition). * We do not create a Linux partition for the partition tables, but * only for the actual data partitions. */ static void parse_extended(struct parsed_partitions *state, sector_t first_sector, sector_t first_size, u32 disksig) { struct msdos_partition *p; Sector sect; unsigned char *data; sector_t this_sector, this_size; sector_t sector_size; int loopct = 0; /* number of links followed without finding a data partition */ int i; sector_size = queue_logical_block_size(state->disk->queue) / 512; this_sector = first_sector; this_size = first_size; while (1) { if (++loopct > 100) return; if (state->next == state->limit) return; data = read_part_sector(state, this_sector, &sect); if (!data) return; if (!msdos_magic_present(data + 510)) goto done; p = (struct msdos_partition *) (data + 0x1be); /* * Usually, the first entry is the real data partition, * the 2nd entry is the next extended partition, or empty, * and the 3rd and 4th entries are unused. * However, DRDOS sometimes has the extended partition as * the first entry (when the data partition is empty), * and OS/2 seems to use all four entries. */ /* * First process the data partition(s) */ for (i = 0; i < 4; i++, p++) { sector_t offs, size, next; if (!nr_sects(p) || is_extended_partition(p)) continue; /* Check the 3rd and 4th entries - these sometimes contain random garbage */ offs = start_sect(p)*sector_size; size = nr_sects(p)*sector_size; next = this_sector + offs; if (i >= 2) { if (offs + size > this_size) continue; if (next < first_sector) continue; if (next + size > first_sector + first_size) continue; } put_partition(state, state->next, next, size); set_info(state, state->next, disksig); if (p->sys_ind == LINUX_RAID_PARTITION) state->parts[state->next].flags = ADDPART_FLAG_RAID; loopct = 0; if (++state->next == state->limit) goto done; } /* * Next, process the (first) extended partition, if present. * (So far, there seems to be no reason to make * parse_extended() recursive and allow a tree * of extended partitions.) * It should be a link to the next logical partition. */ p -= 4; for (i = 0; i < 4; i++, p++) if (nr_sects(p) && is_extended_partition(p)) break; if (i == 4) goto done; /* nothing left to do */ this_sector = first_sector + start_sect(p) * sector_size; this_size = nr_sects(p) * sector_size; put_dev_sector(sect); } done: put_dev_sector(sect); } #define SOLARIS_X86_NUMSLICE 16 #define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) struct solaris_x86_slice { __le16 s_tag; /* ID tag of partition */ __le16 s_flag; /* permission flags */ __le32 s_start; /* start sector no of partition */ __le32 s_size; /* # of blocks in partition */ }; struct solaris_x86_vtoc { unsigned int v_bootinfo[3]; /* info needed by mboot */ __le32 v_sanity; /* to verify vtoc sanity */ __le32 v_version; /* layout version */ char v_volume[8]; /* volume name */ __le16 v_sectorsz; /* sector size in bytes */ __le16 v_nparts; /* number of partitions */ unsigned int v_reserved[10]; /* free space */ struct solaris_x86_slice v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ unsigned int timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */ char v_asciilabel[128]; /* for compatibility */ }; /* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also indicates linux swap. Be careful before believing this is Solaris. */ static void parse_solaris_x86(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_SOLARIS_X86_PARTITION Sector sect; struct solaris_x86_vtoc *v; int i; short max_nparts; v = read_part_sector(state, offset + 1, &sect); if (!v) return; if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { put_dev_sector(sect); return; } { char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1]; snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin); strlcat(state->pp_buf, tmp, PAGE_SIZE); } if (le32_to_cpu(v->v_version) != 1) { char tmp[64]; snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n", le32_to_cpu(v->v_version)); strlcat(state->pp_buf, tmp, PAGE_SIZE); put_dev_sector(sect); return; } /* Ensure we can handle previous case of VTOC with 8 entries gracefully */ max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; for (i = 0; i < max_nparts && state->next < state->limit; i++) { struct solaris_x86_slice *s = &v->v_slice[i]; char tmp[3 + 10 + 1 + 1]; if (s->s_size == 0) continue; snprintf(tmp, sizeof(tmp), " [s%d]", i); strlcat(state->pp_buf, tmp, PAGE_SIZE); /* solaris partitions are relative to current MS-DOS * one; must add the offset of the current partition */ put_partition(state, state->next++, le32_to_cpu(s->s_start)+offset, le32_to_cpu(s->s_size)); } put_dev_sector(sect); strlcat(state->pp_buf, " >\n", PAGE_SIZE); #endif } /* check against BSD src/sys/sys/disklabel.h for consistency */ #define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ #define BSD_MAXPARTITIONS 16 #define OPENBSD_MAXPARTITIONS 16 #define BSD_FS_UNUSED 0 /* disklabel unused partition entry ID */ struct bsd_disklabel { __le32 d_magic; /* the magic number */ __s16 d_type; /* drive type */ __s16 d_subtype; /* controller/d_type specific */ char d_typename[16]; /* type name, e.g. "eagle" */ char d_packname[16]; /* pack identifier */ __u32 d_secsize; /* # of bytes per sector */ __u32 d_nsectors; /* # of data sectors per track */ __u32 d_ntracks; /* # of tracks per cylinder */ __u32 d_ncylinders; /* # of data cylinders per unit */ __u32 d_secpercyl; /* # of data sectors per cylinder */ __u32 d_secperunit; /* # of data sectors per unit */ __u16 d_sparespertrack; /* # of spare sectors per track */ __u16 d_sparespercyl; /* # of spare sectors per cylinder */ __u32 d_acylinders; /* # of alt. cylinders per unit */ __u16 d_rpm; /* rotational speed */ __u16 d_interleave; /* hardware sector interleave */ __u16 d_trackskew; /* sector 0 skew, per track */ __u16 d_cylskew; /* sector 0 skew, per cylinder */ __u32 d_headswitch; /* head switch time, usec */ __u32 d_trkseek; /* track-to-track seek, usec */ __u32 d_flags; /* generic flags */ #define NDDATA 5 __u32 d_drivedata[NDDATA]; /* drive-type specific information */ #define NSPARE 5 __u32 d_spare[NSPARE]; /* reserved for future use */ __le32 d_magic2; /* the magic number (again) */ __le16 d_checksum; /* xor of data incl. partitions */ /* filesystem and partition information: */ __le16 d_npartitions; /* number of partitions in following */ __le32 d_bbsize; /* size of boot area at sn0, bytes */ __le32 d_sbsize; /* max size of fs superblock, bytes */ struct bsd_partition { /* the partition table */ __le32 p_size; /* number of sectors in partition */ __le32 p_offset; /* starting sector */ __le32 p_fsize; /* filesystem basic fragment size */ __u8 p_fstype; /* filesystem type, see below */ __u8 p_frag; /* filesystem fragments per block */ __le16 p_cpg; /* filesystem cylinders per group */ } d_partitions[BSD_MAXPARTITIONS]; /* actually may be more */ }; #if defined(CONFIG_BSD_DISKLABEL) /* * Create devices for BSD partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ static void parse_bsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin, char *flavour, int max_partitions) { Sector sect; struct bsd_disklabel *l; struct bsd_partition *p; char tmp[64]; l = read_part_sector(state, offset + 1, &sect); if (!l) return; if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { put_dev_sector(sect); return; } snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour); strlcat(state->pp_buf, tmp, PAGE_SIZE); if (le16_to_cpu(l->d_npartitions) < max_partitions) max_partitions = le16_to_cpu(l->d_npartitions); for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) { sector_t bsd_start, bsd_size; if (state->next == state->limit) break; if (p->p_fstype == BSD_FS_UNUSED) continue; bsd_start = le32_to_cpu(p->p_offset); bsd_size = le32_to_cpu(p->p_size); /* FreeBSD has relative offset if C partition offset is zero */ if (memcmp(flavour, "bsd\0", 4) == 0 && le32_to_cpu(l->d_partitions[2].p_offset) == 0) bsd_start += offset; if (offset == bsd_start && size == bsd_size) /* full parent partition, we have it already */ continue; if (offset > bsd_start || offset+size < bsd_start+bsd_size) { strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE); continue; } put_partition(state, state->next++, bsd_start, bsd_size); } put_dev_sector(sect); if (le16_to_cpu(l->d_npartitions) > max_partitions) { snprintf(tmp, sizeof(tmp), " (ignored %d more)", le16_to_cpu(l->d_npartitions) - max_partitions); strlcat(state->pp_buf, tmp, PAGE_SIZE); } strlcat(state->pp_buf, " >\n", PAGE_SIZE); } #endif static void parse_freebsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS); #endif } static void parse_netbsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS); #endif } static void parse_openbsd(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_BSD_DISKLABEL parse_bsd(state, offset, size, origin, "openbsd", OPENBSD_MAXPARTITIONS); #endif } #define UNIXWARE_DISKMAGIC (0xCA5E600DUL) /* The disk magic number */ #define UNIXWARE_DISKMAGIC2 (0x600DDEEEUL) /* The slice table magic nr */ #define UNIXWARE_NUMSLICE 16 #define UNIXWARE_FS_UNUSED 0 /* Unused slice entry ID */ struct unixware_slice { __le16 s_label; /* label */ __le16 s_flags; /* permission flags */ __le32 start_sect; /* starting sector */ __le32 nr_sects; /* number of sectors in slice */ }; struct unixware_disklabel { __le32 d_type; /* drive type */ __le32 d_magic; /* the magic number */ __le32 d_version; /* version number */ char d_serial[12]; /* serial number of the device */ __le32 d_ncylinders; /* # of data cylinders per device */ __le32 d_ntracks; /* # of tracks per cylinder */ __le32 d_nsectors; /* # of data sectors per track */ __le32 d_secsize; /* # of bytes per sector */ __le32 d_part_start; /* # of first sector of this partition*/ __le32 d_unknown1[12]; /* ? */ __le32 d_alt_tbl; /* byte offset of alternate table */ __le32 d_alt_len; /* byte length of alternate table */ __le32 d_phys_cyl; /* # of physical cylinders per device */ __le32 d_phys_trk; /* # of physical tracks per cylinder */ __le32 d_phys_sec; /* # of physical sectors per track */ __le32 d_phys_bytes; /* # of physical bytes per sector */ __le32 d_unknown2; /* ? */ __le32 d_unknown3; /* ? */ __le32 d_pad[8]; /* pad */ struct unixware_vtoc { __le32 v_magic; /* the magic number */ __le32 v_version; /* version number */ char v_name[8]; /* volume name */ __le16 v_nslices; /* # of slices */ __le16 v_unknown1; /* ? */ __le32 v_reserved[10]; /* reserved */ struct unixware_slice v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ } vtoc; }; /* 408 */ /* * Create devices for Unixware partitions listed in a disklabel, under a * dos-like partition. See parse_extended() for more information. */ static void parse_unixware(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_UNIXWARE_DISKLABEL Sector sect; struct unixware_disklabel *l; struct unixware_slice *p; l = read_part_sector(state, offset + 29, &sect); if (!l) return; if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) { put_dev_sector(sect); return; } { char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1]; snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin); strlcat(state->pp_buf, tmp, PAGE_SIZE); } p = &l->vtoc.v_slice[1]; /* I omit the 0th slice as it is the same as whole disk. */ while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { if (state->next == state->limit) break; if (p->s_label != UNIXWARE_FS_UNUSED) put_partition(state, state->next++, le32_to_cpu(p->start_sect), le32_to_cpu(p->nr_sects)); p++; } put_dev_sector(sect); strlcat(state->pp_buf, " >\n", PAGE_SIZE); #endif } #define MINIX_NR_SUBPARTITIONS 4 /* * Minix 2.0.0/2.0.2 subpartition support. * Anand Krishnamurthy <anandk@wiproge.med.ge.com> * Rajeev V. Pillai <rajeevvp@yahoo.com> */ static void parse_minix(struct parsed_partitions *state, sector_t offset, sector_t size, int origin) { #ifdef CONFIG_MINIX_SUBPARTITION Sector sect; unsigned char *data; struct msdos_partition *p; int i; data = read_part_sector(state, offset, &sect); if (!data) return; p = (struct msdos_partition *)(data + 0x1be); /* The first sector of a Minix partition can have either * a secondary MBR describing its subpartitions, or * the normal boot sector. */ if (msdos_magic_present(data + 510) && p->sys_ind == MINIX_PARTITION) { /* subpartition table present */ char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin); strlcat(state->pp_buf, tmp, PAGE_SIZE); for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { if (state->next == state->limit) break; /* add each partition in use */ if (p->sys_ind == MINIX_PARTITION) put_partition(state, state->next++, start_sect(p), nr_sects(p)); } strlcat(state->pp_buf, " >\n", PAGE_SIZE); } put_dev_sector(sect); #endif /* CONFIG_MINIX_SUBPARTITION */ } static struct { unsigned char id; void (*parse)(struct parsed_partitions *, sector_t, sector_t, int); } subtypes[] = { {FREEBSD_PARTITION, parse_freebsd}, {NETBSD_PARTITION, parse_netbsd}, {OPENBSD_PARTITION, parse_openbsd}, {MINIX_PARTITION, parse_minix}, {UNIXWARE_PARTITION, parse_unixware}, {SOLARIS_X86_PARTITION, parse_solaris_x86}, {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86}, {0, NULL}, }; int msdos_partition(struct parsed_partitions *state) { sector_t sector_size; Sector sect; unsigned char *data; struct msdos_partition *p; struct fat_boot_sector *fb; int slot; u32 disksig; sector_size = queue_logical_block_size(state->disk->queue) / 512; data = read_part_sector(state, 0, &sect); if (!data) return -1; /* * Note order! (some AIX disks, e.g. unbootable kind, * have no MSDOS 55aa) */ if (aix_magic_present(state, data)) { put_dev_sector(sect); #ifdef CONFIG_AIX_PARTITION return aix_partition(state); #else strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); return 0; #endif } if (!msdos_magic_present(data + 510)) { put_dev_sector(sect); return 0; } /* * Now that the 55aa signature is present, this is probably * either the boot sector of a FAT filesystem or a DOS-type * partition table. Reject this in case the boot indicator * is not 0 or 0x80. */ p = (struct msdos_partition *) (data + 0x1be); for (slot = 1; slot <= 4; slot++, p++) { if (p->boot_ind != 0 && p->boot_ind != 0x80) { /* * Even without a valid boot indicator value * its still possible this is valid FAT filesystem * without a partition table. */ fb = (struct fat_boot_sector *) data; if (slot == 1 && fb->reserved && fb->fats && fat_valid_media(fb->media)) { strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; } else { put_dev_sector(sect); return 0; } } } #ifdef CONFIG_EFI_PARTITION p = (struct msdos_partition *) (data + 0x1be); for (slot = 1 ; slot <= 4 ; slot++, p++) { /* If this is an EFI GPT disk, msdos should ignore it. */ if (p->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT) { put_dev_sector(sect); return 0; } } #endif p = (struct msdos_partition *) (data + 0x1be); disksig = le32_to_cpup((__le32 *)(data + 0x1b8)); /* * Look for partitions in two passes: * First find the primary and DOS-type extended partitions. * On the second pass look inside *BSD, Unixware and Solaris partitions. */ state->next = 5; for (slot = 1 ; slot <= 4 ; slot++, p++) { sector_t start = start_sect(p)*sector_size; sector_t size = nr_sects(p)*sector_size; if (!size) continue; if (is_extended_partition(p)) { /* * prevent someone doing mkfs or mkswap on an * extended partition, but leave room for LILO * FIXME: this uses one logical sector for > 512b * sector, although it may not be enough/proper. */ sector_t n = 2; n = min(size, max(sector_size, n)); put_partition(state, slot, start, n); strlcat(state->pp_buf, " <", PAGE_SIZE); parse_extended(state, start, size, disksig); strlcat(state->pp_buf, " >", PAGE_SIZE); continue; } put_partition(state, slot, start, size); set_info(state, slot, disksig); if (p->sys_ind == LINUX_RAID_PARTITION) state->parts[slot].flags = ADDPART_FLAG_RAID; if (p->sys_ind == DM6_PARTITION) strlcat(state->pp_buf, "[DM]", PAGE_SIZE); if (p->sys_ind == EZD_PARTITION) strlcat(state->pp_buf, "[EZD]", PAGE_SIZE); } strlcat(state->pp_buf, "\n", PAGE_SIZE); /* second pass - output for each on a separate line */ p = (struct msdos_partition *) (0x1be + data); for (slot = 1 ; slot <= 4 ; slot++, p++) { unsigned char id = p->sys_ind; int n; if (!nr_sects(p)) continue; for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++) ; if (!subtypes[n].parse) continue; subtypes[n].parse(state, start_sect(p) * sector_size, nr_sects(p) * sector_size, slot); } put_dev_sector(sect); return 1; }
10 1 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 // SPDX-License-Identifier: GPL-2.0-only /* * netfilter module to enforce network quotas * * Sam Johnston <samj@samj.net> */ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_quota.h> #include <linux/module.h> struct xt_quota_priv { spinlock_t lock; uint64_t quota; }; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); MODULE_DESCRIPTION("Xtables: countdown quota match"); MODULE_ALIAS("ipt_quota"); MODULE_ALIAS("ip6t_quota"); static bool quota_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct xt_quota_info *q = (void *)par->matchinfo; struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh(&priv->lock); if (priv->quota >= skb->len) { priv->quota -= skb->len; ret = !ret; } else { /* we do not allow even small packets from now on */ priv->quota = 0; } spin_unlock_bh(&priv->lock); return ret; } static int quota_mt_check(const struct xt_mtchk_param *par) { struct xt_quota_info *q = par->matchinfo; if (q->flags & ~XT_QUOTA_MASK) return -EINVAL; q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); if (q->master == NULL) return -ENOMEM; spin_lock_init(&q->master->lock); q->master->quota = q->quota; return 0; } static void quota_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_quota_info *q = par->matchinfo; kfree(q->master); } static struct xt_match quota_mt_reg __read_mostly = { .name = "quota", .revision = 0, .family = NFPROTO_UNSPEC, .match = quota_mt, .checkentry = quota_mt_check, .destroy = quota_mt_destroy, .matchsize = sizeof(struct xt_quota_info), .usersize = offsetof(struct xt_quota_info, master), .me = THIS_MODULE, }; static int __init quota_mt_init(void) { return xt_register_match(&quota_mt_reg); } static void __exit quota_mt_exit(void) { xt_unregister_match(&quota_mt_reg); } module_init(quota_mt_init); module_exit(quota_mt_exit);
15 15 2 1 9 3 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 // SPDX-License-Identifier: GPL-2.0-or-later /* * SR-IPv6 implementation * * Author: * David Lebrun <david.lebrun@uclouvain.be> */ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/module.h> #include <net/ip.h> #include <net/ip_tunnels.h> #include <net/lwtunnel.h> #include <net/netevent.h> #include <net/netns/generic.h> #include <net/ip6_fib.h> #include <net/route.h> #include <net/seg6.h> #include <linux/seg6.h> #include <linux/seg6_iptunnel.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/dst_cache.h> #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif #include <linux/netfilter.h> static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { int head = 0; switch (tuninfo->mode) { case SEG6_IPTUN_MODE_INLINE: break; case SEG6_IPTUN_MODE_ENCAP: case SEG6_IPTUN_MODE_ENCAP_RED: head = sizeof(struct ipv6hdr); break; case SEG6_IPTUN_MODE_L2ENCAP: case SEG6_IPTUN_MODE_L2ENCAP_RED: return 0; } return ((tuninfo->srh->hdrlen + 1) << 3) + head; } struct seg6_lwt { struct dst_cache cache; struct seg6_iptunnel_encap tuninfo[]; }; static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) { return (struct seg6_lwt *)lwt->data; } static inline struct seg6_iptunnel_encap * seg6_encap_lwtunnel(struct lwtunnel_state *lwt) { return seg6_lwt_lwtunnel(lwt)->tuninfo; } static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, }; static int nla_put_srh(struct sk_buff *skb, int attrtype, struct seg6_iptunnel_encap *tuninfo) { struct seg6_iptunnel_encap *data; struct nlattr *nla; int len; len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); nla = nla_reserve(skb, attrtype, len); if (!nla) return -EMSGSIZE; data = nla_data(nla); memcpy(data, tuninfo, len); return 0; } static void set_tun_src(struct net *net, struct net_device *dev, struct in6_addr *daddr, struct in6_addr *saddr) { struct seg6_pernet_data *sdata = seg6_pernet(net); struct in6_addr *tun_src; rcu_read_lock(); tun_src = rcu_dereference(sdata->tun_src); if (!ipv6_addr_any(tun_src)) { memcpy(saddr, tun_src, sizeof(struct in6_addr)); } else { ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, saddr); } rcu_read_unlock(); } /* Compute flowlabel for outer IPv6 header */ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, struct ipv6hdr *inner_hdr) { int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel; __be32 flowlabel = 0; u32 hash; if (do_flowlabel > 0) { hash = skb_get_hash(skb); hash = rol32(hash, 16); flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) { flowlabel = ip6_flowlabel(inner_hdr); } return flowlabel; } static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(dst->dev); struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; __be32 flowlabel; hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; inner_hdr = ipv6_hdr(skb); flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); skb_push(skb, tot_len); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); hdr = ipv6_hdr(skb); /* inherit tc, flowlabel and hlim * hlim will be decremented in ip6_forward() afterwards and * decapsulation will overwrite inner hlim with outer hlim */ if (skb->protocol == htons(ETH_P_IPV6)) { ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), flowlabel); hdr->hop_limit = inner_hdr->hop_limit; } else { ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); /* the control block has been erased, so we have to set the * iif once again. * We read the receiving interface index directly from the * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: * ip_rcv_core(...)). */ IP6CB(skb)->iif = skb->skb_iif; } hdr->nexthdr = NEXTHDR_ROUTING; isrh = (void *)hdr + sizeof(*hdr); memcpy(isrh, osrh, hdrlen); isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { err = seg6_push_hmac(net, &hdr->saddr, isrh); if (unlikely(err)) return err; } #endif hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_postpush_rcsum(skb, hdr, tot_len); return 0; } /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { return __seg6_do_srh_encap(skb, osrh, proto, NULL); } EXPORT_SYMBOL_GPL(seg6_do_srh_encap); /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ static int seg6_do_srh_encap_red(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto, struct dst_entry *cache_dst) { __u8 first_seg = osrh->first_segment; struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(dst->dev); struct ipv6hdr *hdr, *inner_hdr; int hdrlen = ipv6_optlen(osrh); int red_tlv_offset, tlv_offset; struct ipv6_sr_hdr *isrh; bool skip_srh = false; __be32 flowlabel; int tot_len, err; int red_hdrlen; int tlvs_len; if (first_seg > 0) { red_hdrlen = hdrlen - sizeof(struct in6_addr); } else { /* NOTE: if tag/flags and/or other TLVs are introduced in the * seg6_iptunnel infrastructure, they should be considered when * deciding to skip the SRH. */ skip_srh = !sr_has_hmac(osrh); red_hdrlen = skip_srh ? 0 : hdrlen; } tot_len = red_hdrlen + sizeof(struct ipv6hdr); err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; inner_hdr = ipv6_hdr(skb); flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); skb_push(skb, tot_len); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); hdr = ipv6_hdr(skb); /* based on seg6_do_srh_encap() */ if (skb->protocol == htons(ETH_P_IPV6)) { ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), flowlabel); hdr->hop_limit = inner_hdr->hop_limit; } else { ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); IP6CB(skb)->iif = skb->skb_iif; } /* no matter if we have to skip the SRH or not, the first segment * always comes in the pushed IPv6 header. */ hdr->daddr = osrh->segments[first_seg]; if (skip_srh) { hdr->nexthdr = proto; set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); goto out; } /* we cannot skip the SRH, slow path */ hdr->nexthdr = NEXTHDR_ROUTING; isrh = (void *)hdr + sizeof(struct ipv6hdr); if (unlikely(!first_seg)) { /* this is a very rare case; we have only one SID but * we cannot skip the SRH since we are carrying some * other info. */ memcpy(isrh, osrh, hdrlen); goto srcaddr; } tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr); red_tlv_offset = tlv_offset - sizeof(struct in6_addr); memcpy(isrh, osrh, red_tlv_offset); tlvs_len = hdrlen - tlv_offset; if (unlikely(tlvs_len > 0)) { const void *s = (const void *)osrh + tlv_offset; void *d = (void *)isrh + red_tlv_offset; memcpy(d, s, tlvs_len); } --isrh->first_segment; isrh->hdrlen -= 2; srcaddr: isrh->nexthdr = proto; set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (unlikely(!skip_srh && sr_has_hmac(isrh))) { err = seg6_push_hmac(net, &hdr->saddr, isrh); if (unlikely(err)) return err; } #endif out: hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_postpush_rcsum(skb, hdr, tot_len); return 0; } static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, struct dst_entry *cache_dst) { struct ipv6hdr *hdr, *oldhdr; struct ipv6_sr_hdr *isrh; int hdrlen, err; hdrlen = (osrh->hdrlen + 1) << 3; err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; oldhdr = ipv6_hdr(skb); skb_pull(skb, sizeof(struct ipv6hdr)); skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(struct ipv6hdr)); skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); hdr = ipv6_hdr(skb); memmove(hdr, oldhdr, sizeof(*hdr)); isrh = (void *)hdr + sizeof(*hdr); memcpy(isrh, osrh, hdrlen); isrh->nexthdr = hdr->nexthdr; hdr->nexthdr = NEXTHDR_ROUTING; isrh->segments[0] = hdr->daddr; hdr->daddr = isrh->segments[isrh->first_segment]; #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { struct net *net = dev_net(skb_dst(skb)->dev); err = seg6_push_hmac(net, &hdr->saddr, isrh); if (unlikely(err)) return err; } #endif hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); return 0; } static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct seg6_iptunnel_encap *tinfo; int proto, err = 0; tinfo = seg6_encap_lwtunnel(dst->lwtstate); switch (tinfo->mode) { case SEG6_IPTUN_MODE_INLINE: if (skb->protocol != htons(ETH_P_IPV6)) return -EINVAL; err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst); if (err) return err; break; case SEG6_IPTUN_MODE_ENCAP: case SEG6_IPTUN_MODE_ENCAP_RED: err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); if (err) return err; if (skb->protocol == htons(ETH_P_IPV6)) proto = IPPROTO_IPV6; else if (skb->protocol == htons(ETH_P_IP)) proto = IPPROTO_IPIP; else return -EINVAL; if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) err = __seg6_do_srh_encap(skb, tinfo->srh, proto, cache_dst); else err = seg6_do_srh_encap_red(skb, tinfo->srh, proto, cache_dst); if (err) return err; skb_set_inner_transport_header(skb, skb_transport_offset(skb)); skb_set_inner_protocol(skb, skb->protocol); skb->protocol = htons(ETH_P_IPV6); break; case SEG6_IPTUN_MODE_L2ENCAP: case SEG6_IPTUN_MODE_L2ENCAP_RED: if (!skb_mac_header_was_set(skb)) return -EINVAL; if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0) return -ENOMEM; skb_mac_header_rebuild(skb); skb_push(skb, skb->mac_len); if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) err = __seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET, cache_dst); else err = seg6_do_srh_encap_red(skb, tinfo->srh, IPPROTO_ETHERNET, cache_dst); if (err) return err; skb->protocol = htons(ETH_P_IPV6); break; } skb_set_transport_header(skb, sizeof(struct ipv6hdr)); nf_reset_ct(skb); return 0; } /* insert an SRH within an IPv6 packet, just after the IPv6 header */ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) { return __seg6_do_srh_inline(skb, osrh, NULL); } EXPORT_SYMBOL_GPL(seg6_do_srh_inline); static int seg6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return dst_input(skb); } static int seg6_input_core(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; struct lwtunnel_state *lwtst; struct seg6_lwt *slwt; int err; /* We cannot dereference "orig_dst" once ip6_route_input() or * skb_dst_drop() is called. However, in order to detect a dst loop, we * need the address of its lwtstate. So, save the address of lwtstate * now and use it later as a comparison. */ lwtst = orig_dst->lwtstate; slwt = seg6_lwt_lwtunnel(lwtst); local_bh_disable(); dst = dst_cache_get(&slwt->cache); local_bh_enable(); err = seg6_do_srh(skb, dst); if (unlikely(err)) { dst_release(dst); goto drop; } if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); /* cache only if we don't create a dst reference loop */ if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &ipv6_hdr(skb)->saddr); local_bh_enable(); } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) goto drop; } else { skb_dst_drop(skb); skb_dst_set(skb, dst); } if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, dev_net(skb->dev), NULL, skb, NULL, skb_dst(skb)->dev, seg6_input_finish); return seg6_input_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return err; } static int seg6_input_nf(struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct net *net = dev_net(skb->dev); switch (skb->protocol) { case htons(ETH_P_IP): return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, skb, NULL, dev, seg6_input_core); case htons(ETH_P_IPV6): return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, skb, NULL, dev, seg6_input_core); } return -EINVAL; } static int seg6_input(struct sk_buff *skb) { if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return seg6_input_nf(skb); return seg6_input_core(dev_net(skb->dev), NULL, skb); } static int seg6_output_core(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; struct seg6_lwt *slwt; int err; slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); local_bh_disable(); dst = dst_cache_get(&slwt->cache); local_bh_enable(); err = seg6_do_srh(skb, dst); if (unlikely(err)) goto drop; if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.daddr = hdr->daddr; fl6.saddr = hdr->saddr; fl6.flowlabel = ip6_flowinfo(hdr); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = hdr->nexthdr; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; goto drop; } /* cache only if we don't create a dst reference loop */ if (orig_dst->lwtstate != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); local_bh_enable(); } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) goto drop; } skb_dst_drop(skb); skb_dst_set(skb, dst); if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); return dst_output(net, sk, skb); drop: dst_release(dst); kfree_skb(skb); return err; } static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; switch (skb->protocol) { case htons(ETH_P_IP): return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, dev, seg6_output_core); case htons(ETH_P_IPV6): return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, dev, seg6_output_core); } return -EINVAL; } static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return seg6_output_nf(net, sk, skb); return seg6_output_core(net, sk, skb); } static int seg6_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; struct seg6_iptunnel_encap *tuninfo; struct lwtunnel_state *newts; int tuninfo_len, min_size; struct seg6_lwt *slwt; int err; if (family != AF_INET && family != AF_INET6) return -EINVAL; err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla, seg6_iptunnel_policy, extack); if (err < 0) return err; if (!tb[SEG6_IPTUNNEL_SRH]) return -EINVAL; tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); /* tuninfo must contain at least the iptunnel encap structure, * the SRH and one segment */ min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + sizeof(struct in6_addr); if (tuninfo_len < min_size) return -EINVAL; switch (tuninfo->mode) { case SEG6_IPTUN_MODE_INLINE: if (family != AF_INET6) return -EINVAL; break; case SEG6_IPTUN_MODE_ENCAP: break; case SEG6_IPTUN_MODE_L2ENCAP: break; case SEG6_IPTUN_MODE_ENCAP_RED: break; case SEG6_IPTUN_MODE_L2ENCAP_RED: break; default: return -EINVAL; } /* verify that SRH is consistent */ if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false)) return -EINVAL; newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); if (!newts) return -ENOMEM; slwt = seg6_lwt_lwtunnel(newts); err = dst_cache_init(&slwt->cache, GFP_ATOMIC); if (err) { kfree(newts); return err; } memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); newts->type = LWTUNNEL_ENCAP_SEG6; newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; newts->headroom = seg6_lwt_headroom(tuninfo); *ts = newts; return 0; } static void seg6_destroy_state(struct lwtunnel_state *lwt) { dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); } static int seg6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) return -EMSGSIZE; return 0; } static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); } static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) return 1; return memcmp(a_hdr, b_hdr, len); } static const struct lwtunnel_encap_ops seg6_iptun_ops = { .build_state = seg6_build_state, .destroy_state = seg6_destroy_state, .output = seg6_output, .input = seg6_input, .fill_encap = seg6_fill_encap_info, .get_encap_size = seg6_encap_nlsize, .cmp_encap = seg6_encap_cmp, .owner = THIS_MODULE, }; int __init seg6_iptunnel_init(void) { return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); } void seg6_iptunnel_exit(void) { lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); }
3 1 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 /* * Copyright (c) 2006 Oracle. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/percpu.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include "rds.h" #include "ib.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); static const char *const rds_ib_stat_names[] = { "ib_connect_raced", "ib_listen_closed_stale", "ib_evt_handler_call", "ib_tasklet_call", "ib_tx_cq_event", "ib_tx_ring_full", "ib_tx_throttle", "ib_tx_sg_mapping_failure", "ib_tx_stalled", "ib_tx_credit_updates", "ib_rx_cq_event", "ib_rx_ring_empty", "ib_rx_refill_from_cq", "ib_rx_refill_from_thread", "ib_rx_alloc_limit", "ib_rx_total_frags", "ib_rx_total_incs", "ib_rx_credit_updates", "ib_ack_sent", "ib_ack_send_failure", "ib_ack_send_delayed", "ib_ack_send_piggybacked", "ib_ack_received", "ib_rdma_mr_8k_alloc", "ib_rdma_mr_8k_free", "ib_rdma_mr_8k_used", "ib_rdma_mr_8k_pool_flush", "ib_rdma_mr_8k_pool_wait", "ib_rdma_mr_8k_pool_depleted", "ib_rdma_mr_1m_alloc", "ib_rdma_mr_1m_free", "ib_rdma_mr_1m_used", "ib_rdma_mr_1m_pool_flush", "ib_rdma_mr_1m_pool_wait", "ib_rdma_mr_1m_pool_depleted", "ib_rdma_mr_8k_reused", "ib_rdma_mr_1m_reused", "ib_atomic_cswp", "ib_atomic_fadd", }; unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, unsigned int avail) { struct rds_ib_statistics stats = {0, }; uint64_t *src; uint64_t *sum; size_t i; int cpu; if (avail < ARRAY_SIZE(rds_ib_stat_names)) goto out; for_each_online_cpu(cpu) { src = (uint64_t *)&(per_cpu(rds_ib_stats, cpu)); sum = (uint64_t *)&stats; for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) *(sum++) += *(src++); } rds_stats_info_copy(iter, (uint64_t *)&stats, rds_ib_stat_names, ARRAY_SIZE(rds_ib_stat_names)); out: return ARRAY_SIZE(rds_ib_stat_names); }
2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 // SPDX-License-Identifier: GPL-2.0+ /* * copyright (C) 1999/2000 by Henning Zabel <henning@uni-paderborn.de> */ /* * USB-Kernel Driver for the Mustek MDC800 Digital Camera * (c) 1999/2000 Henning Zabel <henning@uni-paderborn.de> * * * The driver brings the USB functions of the MDC800 to Linux. * To use the Camera you must support the USB Protocol of the camera * to the Kernel Node. * The Driver uses a misc device Node. Create it with : * mknod /dev/mustek c 180 32 * * The driver supports only one camera. * * Fix: mdc800 used sleep_on and slept with io_lock held. * Converted sleep_on to waitqueues with schedule_timeout and made io_lock * a semaphore from a spinlock. * by Oliver Neukum <oliver@neukum.name> * (02/12/2001) * * Identify version on module load. * (08/04/2001) gb * * version 0.7.5 * Fixed potential SMP races with Spinlocks. * Thanks to Oliver Neukum <oliver@neukum.name> who * noticed the race conditions. * (30/10/2000) * * Fixed: Setting urb->dev before submitting urb. * by Greg KH <greg@kroah.com> * (13/10/2000) * * version 0.7.3 * bugfix : The mdc800->state field gets set to READY after the * disconnect function sets it to NOT_CONNECTED. This makes the * driver running like the camera is connected and causes some * hang ups. * * version 0.7.1 * MOD_INC and MOD_DEC are changed in usb_probe to prevent load/unload * problems when compiled as Module. * (04/04/2000) * * The mdc800 driver gets assigned the USB Minor 32-47. The Registration * was updated to use these values. * (26/03/2000) * * The Init und Exit Module Function are updated. * (01/03/2000) * * version 0.7.0 * Rewrite of the driver : The driver now uses URB's. The old stuff * has been removed. * * version 0.6.0 * Rewrite of this driver: The Emulation of the rs232 protocoll * has been removed from the driver. A special executeCommand function * for this driver is included to gphoto. * The driver supports two kind of communication to bulk endpoints. * Either with the dev->bus->ops->bulk... or with callback function. * (09/11/1999) * * version 0.5.0: * first Version that gets a version number. Most of the needed * functions work. * (20/10/1999) */ #include <linux/sched/signal.h> #include <linux/signal.h> #include <linux/spinlock.h> #include <linux/errno.h> #include <linux/random.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/wait.h> #include <linux/mutex.h> #include <linux/usb.h> #include <linux/fs.h> /* * Version Information */ #define DRIVER_VERSION "v0.7.5 (30/10/2000)" #define DRIVER_AUTHOR "Henning Zabel <henning@uni-paderborn.de>" #define DRIVER_DESC "USB Driver for Mustek MDC800 Digital Camera" /* Vendor and Product Information */ #define MDC800_VENDOR_ID 0x055f #define MDC800_PRODUCT_ID 0xa800 /* Timeouts (msec) */ #define TO_DOWNLOAD_GET_READY 1500 #define TO_DOWNLOAD_GET_BUSY 1500 #define TO_WRITE_GET_READY 1000 #define TO_DEFAULT_COMMAND 5000 #define TO_READ_FROM_IRQ TO_DEFAULT_COMMAND #define TO_GET_READY TO_DEFAULT_COMMAND /* Minor Number of the device (create with mknod /dev/mustek c 180 32) */ #define MDC800_DEVICE_MINOR_BASE 32 /************************************************************************** Data and structs ***************************************************************************/ typedef enum { NOT_CONNECTED, READY, WORKING, DOWNLOAD } mdc800_state; /* Data for the driver */ struct mdc800_data { struct usb_device * dev; // Device Data mdc800_state state; unsigned int endpoint [4]; struct urb * irq_urb; wait_queue_head_t irq_wait; int irq_woken; char* irq_urb_buffer; int camera_busy; // is camera busy ? int camera_request_ready; // Status to synchronize with irq char camera_response [8]; // last Bytes send after busy struct urb * write_urb; char* write_urb_buffer; wait_queue_head_t write_wait; int written; struct urb * download_urb; char* download_urb_buffer; wait_queue_head_t download_wait; int downloaded; int download_left; // Bytes left to download ? /* Device Data */ char out [64]; // Answer Buffer int out_ptr; // Index to the first not readen byte int out_count; // Bytes in the buffer int open; // Camera device open ? struct mutex io_lock; // IO -lock char in [8]; // Command Input Buffer int in_count; int pic_index; // Cache for the Imagesize (-1 for nothing cached ) int pic_len; int minor; }; /* Specification of the Endpoints */ static struct usb_endpoint_descriptor mdc800_ed [4] = { { .bLength = 0, .bDescriptorType = 0, .bEndpointAddress = 0x01, .bmAttributes = 0x02, .wMaxPacketSize = cpu_to_le16(8), .bInterval = 0, .bRefresh = 0, .bSynchAddress = 0, }, { .bLength = 0, .bDescriptorType = 0, .bEndpointAddress = 0x82, .bmAttributes = 0x03, .wMaxPacketSize = cpu_to_le16(8), .bInterval = 0, .bRefresh = 0, .bSynchAddress = 0, }, { .bLength = 0, .bDescriptorType = 0, .bEndpointAddress = 0x03, .bmAttributes = 0x02, .wMaxPacketSize = cpu_to_le16(64), .bInterval = 0, .bRefresh = 0, .bSynchAddress = 0, }, { .bLength = 0, .bDescriptorType = 0, .bEndpointAddress = 0x84, .bmAttributes = 0x02, .wMaxPacketSize = cpu_to_le16(64), .bInterval = 0, .bRefresh = 0, .bSynchAddress = 0, }, }; /* The Variable used by the driver */ static struct mdc800_data* mdc800; /*************************************************************************** The USB Part of the driver ****************************************************************************/ static int mdc800_endpoint_equals (struct usb_endpoint_descriptor *a,struct usb_endpoint_descriptor *b) { return ( ( a->bEndpointAddress == b->bEndpointAddress ) && ( a->bmAttributes == b->bmAttributes ) && ( a->wMaxPacketSize == b->wMaxPacketSize ) ); } /* * Checks whether the camera responds busy */ static int mdc800_isBusy (char* ch) { int i=0; while (i<8) { if (ch [i] != (char)0x99) return 0; i++; } return 1; } /* * Checks whether the Camera is ready */ static int mdc800_isReady (char *ch) { int i=0; while (i<8) { if (ch [i] != (char)0xbb) return 0; i++; } return 1; } /* * USB IRQ Handler for InputLine */ static void mdc800_usb_irq (struct urb *urb) { int data_received=0, wake_up; unsigned char* b=urb->transfer_buffer; struct mdc800_data* mdc800=urb->context; struct device *dev = &mdc800->dev->dev; int status = urb->status; if (status >= 0) { if (mdc800_isBusy (b)) { if (!mdc800->camera_busy) { mdc800->camera_busy=1; dev_dbg(dev, "gets busy\n"); } } else { if (mdc800->camera_busy && mdc800_isReady (b)) { mdc800->camera_busy=0; dev_dbg(dev, "gets ready\n"); } } if (!(mdc800_isBusy (b) || mdc800_isReady (b))) { /* Store Data in camera_answer field */ dev_dbg(dev, "%i %i %i %i %i %i %i %i \n",b[0],b[1],b[2],b[3],b[4],b[5],b[6],b[7]); memcpy (mdc800->camera_response,b,8); data_received=1; } } wake_up= ( mdc800->camera_request_ready > 0 ) && ( ((mdc800->camera_request_ready == 1) && (!mdc800->camera_busy)) || ((mdc800->camera_request_ready == 2) && data_received) || ((mdc800->camera_request_ready == 3) && (mdc800->camera_busy)) || (status < 0) ); if (wake_up) { mdc800->camera_request_ready=0; mdc800->irq_woken=1; wake_up (&mdc800->irq_wait); } } /* * Waits a while until the irq responds that camera is ready * * mode : 0: Wait for camera gets ready * 1: Wait for receiving data * 2: Wait for camera gets busy * * msec: Time to wait */ static int mdc800_usb_waitForIRQ (int mode, int msec) { mdc800->camera_request_ready=1+mode; wait_event_timeout(mdc800->irq_wait, mdc800->irq_woken, msecs_to_jiffies(msec)); mdc800->irq_woken = 0; if (mdc800->camera_request_ready>0) { mdc800->camera_request_ready=0; dev_err(&mdc800->dev->dev, "timeout waiting for camera.\n"); return -1; } if (mdc800->state == NOT_CONNECTED) { printk(KERN_WARNING "mdc800: Camera gets disconnected " "during waiting for irq.\n"); mdc800->camera_request_ready=0; return -2; } return 0; } /* * The write_urb callback function */ static void mdc800_usb_write_notify (struct urb *urb) { struct mdc800_data* mdc800=urb->context; int status = urb->status; if (status != 0) dev_err(&mdc800->dev->dev, "writing command fails (status=%i)\n", status); else mdc800->state=READY; mdc800->written = 1; wake_up (&mdc800->write_wait); } /* * The download_urb callback function */ static void mdc800_usb_download_notify (struct urb *urb) { struct mdc800_data* mdc800=urb->context; int status = urb->status; if (status == 0) { /* Fill output buffer with these data */ memcpy (mdc800->out, urb->transfer_buffer, 64); mdc800->out_count=64; mdc800->out_ptr=0; mdc800->download_left-=64; if (mdc800->download_left == 0) { mdc800->state=READY; } } else { dev_err(&mdc800->dev->dev, "request bytes fails (status:%i)\n", status); } mdc800->downloaded = 1; wake_up (&mdc800->download_wait); } /*************************************************************************** Probing for the Camera ***************************************************************************/ static struct usb_driver mdc800_usb_driver; static const struct file_operations mdc800_device_ops; static struct usb_class_driver mdc800_class = { .name = "mdc800%d", .fops = &mdc800_device_ops, .minor_base = MDC800_DEVICE_MINOR_BASE, }; /* * Callback to search the Mustek MDC800 on the USB Bus */ static int mdc800_usb_probe (struct usb_interface *intf, const struct usb_device_id *id) { int i,j; struct usb_host_interface *intf_desc; struct usb_device *dev = interface_to_usbdev (intf); int irq_interval=0; int retval; dev_dbg(&intf->dev, "(%s) called.\n", __func__); if (mdc800->dev != NULL) { dev_warn(&intf->dev, "only one Mustek MDC800 is supported.\n"); return -ENODEV; } if (dev->descriptor.bNumConfigurations != 1) { dev_err(&intf->dev, "probe fails -> wrong Number of Configuration\n"); return -ENODEV; } intf_desc = intf->cur_altsetting; if ( ( intf_desc->desc.bInterfaceClass != 0xff ) || ( intf_desc->desc.bInterfaceSubClass != 0 ) || ( intf_desc->desc.bInterfaceProtocol != 0 ) || ( intf_desc->desc.bNumEndpoints != 4) ) { dev_err(&intf->dev, "probe fails -> wrong Interface\n"); return -ENODEV; } /* Check the Endpoints */ for (i=0; i<4; i++) { mdc800->endpoint[i]=-1; for (j=0; j<4; j++) { if (mdc800_endpoint_equals (&intf_desc->endpoint [j].desc,&mdc800_ed [i])) { mdc800->endpoint[i]=intf_desc->endpoint [j].desc.bEndpointAddress ; if (i==1) { irq_interval=intf_desc->endpoint [j].desc.bInterval; } } } if (mdc800->endpoint[i] == -1) { dev_err(&intf->dev, "probe fails -> Wrong Endpoints.\n"); return -ENODEV; } } dev_info(&intf->dev, "Found Mustek MDC800 on USB.\n"); mutex_lock(&mdc800->io_lock); retval = usb_register_dev(intf, &mdc800_class); if (retval) { dev_err(&intf->dev, "Not able to get a minor for this device.\n"); mutex_unlock(&mdc800->io_lock); return -ENODEV; } mdc800->dev=dev; mdc800->open=0; /* Setup URB Structs */ usb_fill_int_urb ( mdc800->irq_urb, mdc800->dev, usb_rcvintpipe (mdc800->dev,mdc800->endpoint [1]), mdc800->irq_urb_buffer, 8, mdc800_usb_irq, mdc800, irq_interval ); usb_fill_bulk_urb ( mdc800->write_urb, mdc800->dev, usb_sndbulkpipe (mdc800->dev, mdc800->endpoint[0]), mdc800->write_urb_buffer, 8, mdc800_usb_write_notify, mdc800 ); usb_fill_bulk_urb ( mdc800->download_urb, mdc800->dev, usb_rcvbulkpipe (mdc800->dev, mdc800->endpoint [3]), mdc800->download_urb_buffer, 64, mdc800_usb_download_notify, mdc800 ); mdc800->state=READY; mutex_unlock(&mdc800->io_lock); usb_set_intfdata(intf, mdc800); return 0; } /* * Disconnect USB device (maybe the MDC800) */ static void mdc800_usb_disconnect (struct usb_interface *intf) { struct mdc800_data* mdc800 = usb_get_intfdata(intf); dev_dbg(&intf->dev, "(%s) called\n", __func__); if (mdc800) { if (mdc800->state == NOT_CONNECTED) return; usb_deregister_dev(intf, &mdc800_class); /* must be under lock to make sure no URB is submitted after usb_kill_urb() */ mutex_lock(&mdc800->io_lock); mdc800->state=NOT_CONNECTED; usb_kill_urb(mdc800->irq_urb); usb_kill_urb(mdc800->write_urb); usb_kill_urb(mdc800->download_urb); mutex_unlock(&mdc800->io_lock); mdc800->dev = NULL; usb_set_intfdata(intf, NULL); } dev_info(&intf->dev, "Mustek MDC800 disconnected from USB.\n"); } /*************************************************************************** The Misc device Part (file_operations) ****************************************************************************/ /* * This Function calc the Answersize for a command. */ static int mdc800_getAnswerSize (char command) { switch ((unsigned char) command) { case 0x2a: case 0x49: case 0x51: case 0x0d: case 0x20: case 0x07: case 0x01: case 0x25: case 0x00: return 8; case 0x05: case 0x3e: return mdc800->pic_len; case 0x09: return 4096; default: return 0; } } /* * Init the device: (1) alloc mem (2) Increase MOD Count .. */ static int mdc800_device_open (struct inode* inode, struct file *file) { int retval=0; int errn=0; mutex_lock(&mdc800->io_lock); if (mdc800->state == NOT_CONNECTED) { errn=-EBUSY; goto error_out; } if (mdc800->open) { errn=-EBUSY; goto error_out; } mdc800->in_count=0; mdc800->out_count=0; mdc800->out_ptr=0; mdc800->pic_index=0; mdc800->pic_len=-1; mdc800->download_left=0; mdc800->camera_busy=0; mdc800->camera_request_ready=0; mdc800->irq_urb->dev = mdc800->dev; retval = usb_submit_urb (mdc800->irq_urb, GFP_KERNEL); if (retval) { dev_err(&mdc800->dev->dev, "request USB irq fails (submit_retval=%i).\n", retval); errn = -EIO; goto error_out; } mdc800->open=1; dev_dbg(&mdc800->dev->dev, "Mustek MDC800 device opened.\n"); error_out: mutex_unlock(&mdc800->io_lock); return errn; } /* * Close the Camera and release Memory */ static int mdc800_device_release (struct inode* inode, struct file *file) { int retval=0; mutex_lock(&mdc800->io_lock); if (mdc800->open && (mdc800->state != NOT_CONNECTED)) { usb_kill_urb(mdc800->irq_urb); usb_kill_urb(mdc800->write_urb); usb_kill_urb(mdc800->download_urb); mdc800->open=0; } else { retval=-EIO; } mutex_unlock(&mdc800->io_lock); return retval; } /* * The Device read callback Function */ static ssize_t mdc800_device_read (struct file *file, char __user *buf, size_t len, loff_t *pos) { size_t left=len, sts=len; /* single transfer size */ char __user *ptr = buf; int retval; mutex_lock(&mdc800->io_lock); if (mdc800->state == NOT_CONNECTED) { mutex_unlock(&mdc800->io_lock); return -EBUSY; } if (mdc800->state == WORKING) { printk(KERN_WARNING "mdc800: Illegal State \"working\"" "reached during read ?!\n"); mutex_unlock(&mdc800->io_lock); return -EBUSY; } if (!mdc800->open) { mutex_unlock(&mdc800->io_lock); return -EBUSY; } while (left) { if (signal_pending (current)) { mutex_unlock(&mdc800->io_lock); return -EINTR; } sts=left > (mdc800->out_count-mdc800->out_ptr)?mdc800->out_count-mdc800->out_ptr:left; if (sts <= 0) { /* Too less Data in buffer */ if (mdc800->state == DOWNLOAD) { mdc800->out_count=0; mdc800->out_ptr=0; /* Download -> Request new bytes */ mdc800->download_urb->dev = mdc800->dev; retval = usb_submit_urb (mdc800->download_urb, GFP_KERNEL); if (retval) { dev_err(&mdc800->dev->dev, "Can't submit download urb " "(retval=%i)\n", retval); mutex_unlock(&mdc800->io_lock); return len-left; } wait_event_timeout(mdc800->download_wait, mdc800->downloaded, msecs_to_jiffies(TO_DOWNLOAD_GET_READY)); mdc800->downloaded = 0; if (mdc800->download_urb->status != 0) { dev_err(&mdc800->dev->dev, "request download-bytes fails " "(status=%i)\n", mdc800->download_urb->status); mutex_unlock(&mdc800->io_lock); return len-left; } } else { /* No more bytes -> that's an error*/ mutex_unlock(&mdc800->io_lock); return -EIO; } } else { /* Copy Bytes */ if (copy_to_user(ptr, &mdc800->out [mdc800->out_ptr], sts)) { mutex_unlock(&mdc800->io_lock); return -EFAULT; } ptr+=sts; left-=sts; mdc800->out_ptr+=sts; } } mutex_unlock(&mdc800->io_lock); return len-left; } /* * The Device write callback Function * If a 8Byte Command is received, it will be send to the camera. * After this the driver initiates the request for the answer or * just waits until the camera becomes ready. */ static ssize_t mdc800_device_write (struct file *file, const char __user *buf, size_t len, loff_t *pos) { size_t i=0; int retval; mutex_lock(&mdc800->io_lock); if (mdc800->state != READY) { mutex_unlock(&mdc800->io_lock); return -EBUSY; } if (!mdc800->open ) { mutex_unlock(&mdc800->io_lock); return -EBUSY; } while (i<len) { unsigned char c; if (signal_pending (current)) { mutex_unlock(&mdc800->io_lock); return -EINTR; } if(get_user(c, buf+i)) { mutex_unlock(&mdc800->io_lock); return -EFAULT; } /* check for command start */ if (c == 0x55) { mdc800->in_count=0; mdc800->out_count=0; mdc800->out_ptr=0; mdc800->download_left=0; } /* save command byte */ if (mdc800->in_count < 8) { mdc800->in[mdc800->in_count] = c; mdc800->in_count++; } else { mutex_unlock(&mdc800->io_lock); return -EIO; } /* Command Buffer full ? -> send it to camera */ if (mdc800->in_count == 8) { int answersize; if (mdc800_usb_waitForIRQ (0,TO_GET_READY)) { dev_err(&mdc800->dev->dev, "Camera didn't get ready.\n"); mutex_unlock(&mdc800->io_lock); return -EIO; } answersize=mdc800_getAnswerSize (mdc800->in[1]); mdc800->state=WORKING; memcpy (mdc800->write_urb->transfer_buffer, mdc800->in,8); mdc800->write_urb->dev = mdc800->dev; retval = usb_submit_urb (mdc800->write_urb, GFP_KERNEL); if (retval) { dev_err(&mdc800->dev->dev, "submitting write urb fails " "(retval=%i)\n", retval); mutex_unlock(&mdc800->io_lock); return -EIO; } wait_event_timeout(mdc800->write_wait, mdc800->written, msecs_to_jiffies(TO_WRITE_GET_READY)); mdc800->written = 0; if (mdc800->state == WORKING) { usb_kill_urb(mdc800->write_urb); mutex_unlock(&mdc800->io_lock); return -EIO; } switch ((unsigned char) mdc800->in[1]) { case 0x05: /* Download Image */ case 0x3e: /* Take shot in Fine Mode (WCam Mode) */ if (mdc800->pic_len < 0) { dev_err(&mdc800->dev->dev, "call 0x07 before " "0x05,0x3e\n"); mdc800->state=READY; mutex_unlock(&mdc800->io_lock); return -EIO; } mdc800->pic_len=-1; fallthrough; case 0x09: /* Download Thumbnail */ mdc800->download_left=answersize+64; mdc800->state=DOWNLOAD; mdc800_usb_waitForIRQ (0,TO_DOWNLOAD_GET_BUSY); break; default: if (answersize) { if (mdc800_usb_waitForIRQ (1,TO_READ_FROM_IRQ)) { dev_err(&mdc800->dev->dev, "requesting answer from irq fails\n"); mutex_unlock(&mdc800->io_lock); return -EIO; } /* Write dummy data, (this is ugly but part of the USB Protocol */ /* if you use endpoint 1 as bulk and not as irq) */ memcpy (mdc800->out, mdc800->camera_response,8); /* This is the interpreted answer */ memcpy (&mdc800->out[8], mdc800->camera_response,8); mdc800->out_ptr=0; mdc800->out_count=16; /* Cache the Imagesize, if command was getImageSize */ if (mdc800->in [1] == (char) 0x07) { mdc800->pic_len=(int) 65536*(unsigned char) mdc800->camera_response[0]+256*(unsigned char) mdc800->camera_response[1]+(unsigned char) mdc800->camera_response[2]; dev_dbg(&mdc800->dev->dev, "cached imagesize = %i\n", mdc800->pic_len); } } else { if (mdc800_usb_waitForIRQ (0,TO_DEFAULT_COMMAND)) { dev_err(&mdc800->dev->dev, "Command Timeout.\n"); mutex_unlock(&mdc800->io_lock); return -EIO; } } mdc800->state=READY; break; } } i++; } mutex_unlock(&mdc800->io_lock); return i; } /*************************************************************************** Init and Cleanup this driver (Structs and types) ****************************************************************************/ /* File Operations of this drivers */ static const struct file_operations mdc800_device_ops = { .owner = THIS_MODULE, .read = mdc800_device_read, .write = mdc800_device_write, .open = mdc800_device_open, .release = mdc800_device_release, .llseek = noop_llseek, }; static const struct usb_device_id mdc800_table[] = { { USB_DEVICE(MDC800_VENDOR_ID, MDC800_PRODUCT_ID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE (usb, mdc800_table); /* * USB Driver Struct for this device */ static struct usb_driver mdc800_usb_driver = { .name = "mdc800", .probe = mdc800_usb_probe, .disconnect = mdc800_usb_disconnect, .id_table = mdc800_table }; /************************************************************************ Init and Cleanup this driver (Main Functions) *************************************************************************/ static int __init usb_mdc800_init (void) { int retval = -ENODEV; /* Allocate Memory */ mdc800=kzalloc (sizeof (struct mdc800_data), GFP_KERNEL); if (!mdc800) goto cleanup_on_fail; mdc800->dev = NULL; mdc800->state=NOT_CONNECTED; mutex_init (&mdc800->io_lock); init_waitqueue_head (&mdc800->irq_wait); init_waitqueue_head (&mdc800->write_wait); init_waitqueue_head (&mdc800->download_wait); mdc800->irq_woken = 0; mdc800->downloaded = 0; mdc800->written = 0; mdc800->irq_urb_buffer=kmalloc (8, GFP_KERNEL); if (!mdc800->irq_urb_buffer) goto cleanup_on_fail; mdc800->write_urb_buffer=kmalloc (8, GFP_KERNEL); if (!mdc800->write_urb_buffer) goto cleanup_on_fail; mdc800->download_urb_buffer=kmalloc (64, GFP_KERNEL); if (!mdc800->download_urb_buffer) goto cleanup_on_fail; mdc800->irq_urb=usb_alloc_urb (0, GFP_KERNEL); if (!mdc800->irq_urb) goto cleanup_on_fail; mdc800->download_urb=usb_alloc_urb (0, GFP_KERNEL); if (!mdc800->download_urb) goto cleanup_on_fail; mdc800->write_urb=usb_alloc_urb (0, GFP_KERNEL); if (!mdc800->write_urb) goto cleanup_on_fail; /* Register the driver */ retval = usb_register(&mdc800_usb_driver); if (retval) goto cleanup_on_fail; printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_VERSION ":" DRIVER_DESC "\n"); return 0; /* Clean driver up, when something fails */ cleanup_on_fail: if (mdc800 != NULL) { printk(KERN_ERR "mdc800: can't alloc memory!\n"); kfree(mdc800->download_urb_buffer); kfree(mdc800->write_urb_buffer); kfree(mdc800->irq_urb_buffer); usb_free_urb(mdc800->write_urb); usb_free_urb(mdc800->download_urb); usb_free_urb(mdc800->irq_urb); kfree (mdc800); } mdc800 = NULL; return retval; } static void __exit usb_mdc800_cleanup (void) { usb_deregister (&mdc800_usb_driver); usb_free_urb (mdc800->irq_urb); usb_free_urb (mdc800->download_urb); usb_free_urb (mdc800->write_urb); kfree (mdc800->irq_urb_buffer); kfree (mdc800->write_urb_buffer); kfree (mdc800->download_urb_buffer); kfree (mdc800); mdc800 = NULL; } module_init (usb_mdc800_init); module_exit (usb_mdc800_cleanup); MODULE_AUTHOR( DRIVER_AUTHOR ); MODULE_DESCRIPTION( DRIVER_DESC ); MODULE_LICENSE("GPL");
10 1 9 20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 // SPDX-License-Identifier: GPL-2.0-or-later /* AFS security handling * * Copyright (C) 2007, 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/init.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/ctype.h> #include <linux/sched.h> #include <linux/hashtable.h> #include <keys/rxrpc-type.h> #include "internal.h" static DEFINE_HASHTABLE(afs_permits_cache, 10); static DEFINE_SPINLOCK(afs_permits_lock); /* * get a key */ struct key *afs_request_key(struct afs_cell *cell) { struct key *key; _enter("{%x}", key_serial(cell->anonymous_key)); _debug("key %s", cell->anonymous_key->description); key = request_key_net(&key_type_rxrpc, cell->anonymous_key->description, cell->net->net, NULL); if (IS_ERR(key)) { if (PTR_ERR(key) != -ENOKEY) { _leave(" = %ld", PTR_ERR(key)); return key; } /* act as anonymous user */ _leave(" = {%x} [anon]", key_serial(cell->anonymous_key)); return key_get(cell->anonymous_key); } else { /* act as authorised user */ _leave(" = {%x} [auth]", key_serial(key)); return key; } } /* * Get a key when pathwalk is in rcuwalk mode. */ struct key *afs_request_key_rcu(struct afs_cell *cell) { struct key *key; _enter("{%x}", key_serial(cell->anonymous_key)); _debug("key %s", cell->anonymous_key->description); key = request_key_net_rcu(&key_type_rxrpc, cell->anonymous_key->description, cell->net->net); if (IS_ERR(key)) { if (PTR_ERR(key) != -ENOKEY) { _leave(" = %ld", PTR_ERR(key)); return key; } /* act as anonymous user */ _leave(" = {%x} [anon]", key_serial(cell->anonymous_key)); return key_get(cell->anonymous_key); } else { /* act as authorised user */ _leave(" = {%x} [auth]", key_serial(key)); return key; } } /* * Dispose of a list of permits. */ static void afs_permits_rcu(struct rcu_head *rcu) { struct afs_permits *permits = container_of(rcu, struct afs_permits, rcu); int i; for (i = 0; i < permits->nr_permits; i++) key_put(permits->permits[i].key); kfree(permits); } /* * Discard a permission cache. */ void afs_put_permits(struct afs_permits *permits) { if (permits && refcount_dec_and_test(&permits->usage)) { spin_lock(&afs_permits_lock); hash_del_rcu(&permits->hash_node); spin_unlock(&afs_permits_lock); call_rcu(&permits->rcu, afs_permits_rcu); } } /* * Clear a permit cache on callback break. */ void afs_clear_permits(struct afs_vnode *vnode) { struct afs_permits *permits; spin_lock(&vnode->lock); permits = rcu_dereference_protected(vnode->permit_cache, lockdep_is_held(&vnode->lock)); RCU_INIT_POINTER(vnode->permit_cache, NULL); spin_unlock(&vnode->lock); afs_put_permits(permits); } /* * Hash a list of permits. Use simple addition to make it easy to add an extra * one at an as-yet indeterminate position in the list. */ static void afs_hash_permits(struct afs_permits *permits) { unsigned long h = permits->nr_permits; int i; for (i = 0; i < permits->nr_permits; i++) { h += (unsigned long)permits->permits[i].key / sizeof(void *); h += permits->permits[i].access; } permits->h = h; } /* * Cache the CallerAccess result obtained from doing a fileserver operation * that returned a vnode status for a particular key. If a callback break * occurs whilst the operation was in progress then we have to ditch the cache * as the ACL *may* have changed. */ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, unsigned int cb_break, struct afs_status_cb *scb) { struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL; afs_access_t caller_access = scb->status.caller_access; size_t size = 0; bool changed = false; int i, j; _enter("{%llx:%llu},%x,%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access); rcu_read_lock(); /* Check for the common case first: We got back the same access as last * time we tried and already have it recorded. */ permits = rcu_dereference(vnode->permit_cache); if (permits) { if (!permits->invalidated) { for (i = 0; i < permits->nr_permits; i++) { if (permits->permits[i].key < key) continue; if (permits->permits[i].key > key) break; if (permits->permits[i].access != caller_access) { changed = true; break; } if (afs_cb_is_broken(cb_break, vnode)) { changed = true; break; } /* The cache is still good. */ rcu_read_unlock(); return; } } changed |= permits->invalidated; size = permits->nr_permits; /* If this set of permits is now wrong, clear the permits * pointer so that no one tries to use the stale information. */ if (changed) { spin_lock(&vnode->lock); if (permits != rcu_access_pointer(vnode->permit_cache)) goto someone_else_changed_it_unlock; RCU_INIT_POINTER(vnode->permit_cache, NULL); spin_unlock(&vnode->lock); afs_put_permits(permits); permits = NULL; size = 0; } } if (afs_cb_is_broken(cb_break, vnode)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to * drop the lock to do memory allocation. */ if (permits && !refcount_inc_not_zero(&permits->usage)) goto someone_else_changed_it; rcu_read_unlock(); /* Speculatively create a new list with the revised permission set. We * discard this if we find an extant match already in the hash, but * it's easier to compare with memcmp this way. * * We fill in the key pointers at this time, but we don't get the refs * yet. */ size++; new = kzalloc(struct_size(new, permits, size), GFP_NOFS); if (!new) goto out_put; refcount_set(&new->usage, 1); new->nr_permits = size; i = j = 0; if (permits) { for (i = 0; i < permits->nr_permits; i++) { if (j == i && permits->permits[i].key > key) { new->permits[j].key = key; new->permits[j].access = caller_access; j++; } new->permits[j].key = permits->permits[i].key; new->permits[j].access = permits->permits[i].access; j++; } } if (j == i) { new->permits[j].key = key; new->permits[j].access = caller_access; } afs_hash_permits(new); /* Now see if the permit list we want is actually already available */ spin_lock(&afs_permits_lock); hash_for_each_possible(afs_permits_cache, xpermits, hash_node, new->h) { if (xpermits->h != new->h || xpermits->invalidated || xpermits->nr_permits != new->nr_permits || memcmp(xpermits->permits, new->permits, new->nr_permits * sizeof(struct afs_permit)) != 0) continue; if (refcount_inc_not_zero(&xpermits->usage)) { replacement = xpermits; goto found; } break; } for (i = 0; i < new->nr_permits; i++) key_get(new->permits[i].key); hash_add_rcu(afs_permits_cache, &new->hash_node, new->h); replacement = new; new = NULL; found: spin_unlock(&afs_permits_lock); kfree(new); rcu_read_lock(); spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); if (!afs_cb_is_broken(cb_break, vnode) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else zap = replacement; spin_unlock(&vnode->lock); rcu_read_unlock(); afs_put_permits(zap); out_put: afs_put_permits(permits); return; someone_else_changed_it_unlock: spin_unlock(&vnode->lock); someone_else_changed_it: /* Someone else changed the cache under us - don't recheck at this * time. */ rcu_read_unlock(); return; } static bool afs_check_permit_rcu(struct afs_vnode *vnode, struct key *key, afs_access_t *_access) { const struct afs_permits *permits; int i; _enter("{%llx:%llu},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); /* check the permits to see if we've got one yet */ if (key == vnode->volume->cell->anonymous_key) { *_access = vnode->status.anon_access; _leave(" = t [anon %x]", *_access); return true; } permits = rcu_dereference(vnode->permit_cache); if (permits) { for (i = 0; i < permits->nr_permits; i++) { if (permits->permits[i].key < key) continue; if (permits->permits[i].key > key) break; *_access = permits->permits[i].access; _leave(" = %u [perm %x]", !permits->invalidated, *_access); return !permits->invalidated; } } _leave(" = f"); return false; } /* * check with the fileserver to see if the directory or parent directory is * permitted to be accessed with this authorisation, and if so, what access it * is granted */ int afs_check_permit(struct afs_vnode *vnode, struct key *key, afs_access_t *_access) { struct afs_permits *permits; bool valid = false; int i, ret; _enter("{%llx:%llu},%x", vnode->fid.vid, vnode->fid.vnode, key_serial(key)); /* check the permits to see if we've got one yet */ if (key == vnode->volume->cell->anonymous_key) { _debug("anon"); *_access = vnode->status.anon_access; valid = true; } else { rcu_read_lock(); permits = rcu_dereference(vnode->permit_cache); if (permits) { for (i = 0; i < permits->nr_permits; i++) { if (permits->permits[i].key < key) continue; if (permits->permits[i].key > key) break; *_access = permits->permits[i].access; valid = !permits->invalidated; break; } } rcu_read_unlock(); } if (!valid) { /* Check the status on the file we're actually interested in * (the post-processing will cache the result). */ _debug("no valid permit"); ret = afs_fetch_status(vnode, key, false, _access); if (ret < 0) { *_access = 0; _leave(" = %d", ret); return ret; } } _leave(" = 0 [access %x]", *_access); return 0; } /* * check the permissions on an AFS file * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ int afs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct afs_vnode *vnode = AFS_FS_I(inode); afs_access_t access; struct key *key; int ret = 0; _enter("{{%llx:%llu},%lx},%x,", vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); if (mask & MAY_NOT_BLOCK) { key = afs_request_key_rcu(vnode->volume->cell); if (IS_ERR(key)) return -ECHILD; ret = -ECHILD; if (!afs_check_validity(vnode) || !afs_check_permit_rcu(vnode, key, &access)) goto error; } else { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { _leave(" = %ld [key]", PTR_ERR(key)); return PTR_ERR(key); } ret = afs_validate(vnode, key); if (ret < 0) goto error; /* check the permits to see if we've got one yet */ ret = afs_check_permit(vnode, key, &access); if (ret < 0) goto error; } /* interpret the access mask */ _debug("REQ %x ACC %x on %s", mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file"); ret = 0; if (S_ISDIR(inode->i_mode)) { if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; } if (mask & MAY_WRITE) { if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */ AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */ goto permission_denied; } } else { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) goto permission_denied; if (mask & (MAY_EXEC | MAY_READ)) { if (!(access & AFS_ACE_READ)) goto permission_denied; if (!(inode->i_mode & S_IRUSR)) goto permission_denied; } else if (mask & MAY_WRITE) { if (!(access & AFS_ACE_WRITE)) goto permission_denied; if (!(inode->i_mode & S_IWUSR)) goto permission_denied; } } key_put(key); _leave(" = %d", ret); return ret; permission_denied: ret = -EACCES; error: key_put(key); _leave(" = %d", ret); return ret; } void __exit afs_clean_up_permit_cache(void) { int i; for (i = 0; i < HASH_SIZE(afs_permits_cache); i++) WARN_ON_ONCE(!hlist_empty(&afs_permits_cache[i])); }
7909 7928 1273 1207 1273 1184 1205 368 2 1420 141 1209 233 1580 214 1420 1582 270 237 1177 1175 1141 244 244 244 2029 1604 481 236 1845 538 1580 1580 1582 1582 1580 2030 2031 2030 679 1383 82 2000 40 203 7 196 59 2032 156 4 3643 7 3142 3145 255 254 1996 1997 1835 4516 2389 408 2029 3432 437 77 436 34 33 34 28 4 8 1 8 2 2 2 11 7 4 7 7 7 7 383 381 381 350 385 384 4 8 17 9 17 21 383 7 381 363 363 382 5439 5438 5441 391 391 354 389 389 385 12 385 25 385 2 391 390 391 7 390 391 389 390 391 388 373 391 390 391 390 14 389 389 391 391 1 391 389 367 391 394 14 365 389 391 391 390 391 391 391 388 366 390 33 1548 3 307 257 119 306 307 1420 16 119 1558 1598 671 672 1599 1600 67 1582 13 8 8 5 2 23 1601 1339 1335 238 1619 28 568 1730 1382 1728 119 1727 1711 119 1716 12 9 2 24 2003 40 2 2 1514 1730 2008 2006 4 2009 2006 2206 333 2009 2209 2210 2206 2210 2211 2211 2211 1 2208 2002 534 24 72 2207 2089 2089 2090 2093 2093 2090 2032 210 210 210 210 207 210 210 206 7 42 1 1 3316 3316 3322 3312 3315 3313 3132 3213 402 401 401 402 403 1 403 403 402 402 403 79 80 79 324 231 324 1 1 323 324 324 3 321 321 324 79 74 25 25 4 4 1 4 4 4 4 4 4 4 4 12 12 12 12 12 12 5 12 12 12 12 6 1 6 12 12 12 12 12 13 13 1 13 4 4 9 6185 6188 6182 46 46 6226 6225 6242 6230 6183 6188 6187 46 6245 6228 6233 6187 6185 6184 46 6245 6231 6185 6199 6187 46 6245 6231 6229 6232 6229 6225 6243 6245 6225 6189 46 102 2 2 8811 2718 178 178 7 1 171 373 373 373 172 210 31 341 65 341 262 260 262 262 261 715 6 125 4947 724 857 723 857 2107 1641 478 2103 486 1640 479 2 2103 1 468 8 1638 2103 14 2086 2103 2102 1637 478 2099 22 2107 1636 490 1641 10 478 478 2105 90 90 1 90 1 96 96 6 90 1 1 91 7 5 3 2 3 3 3 3 3 3 3 7 23 472 108 479 154 3 3 155 2222 15 2207 6 2202 3 604 1731 97 97 11 500 173 157 3 171 2104 476 1 2107 162 162 41 41 35 35 897 1005 125 897 8 59 8309 2675 16 3 13 102 102 10 16 39 69 69 3 69 76 7 76 59 30 12 76 4 76 76 76 76 7 70 1 76 76 76 2 7 69 6 53 31 73 73 6 76 76 76 75 6 75 76 76 4 8 8 8 2 13 4169 4166 8295 5012 4992 4995 8 4189 966 4191 4174 4191 1 4176 4178 9 4178 8272 17 996 880 242 412 740 58 683 53 6 48 45 45 3 45 2524 2530 371 2206 61 301 70 2531 741 401 373 739 634 107 609 435 10 437 738 263 475 4 4 2 737 2 2 735 736 2136 450 2272 2277 65 7 72 2277 2276 660 338 402 398 17 2 84 77 22 70 70 2 80 303 210 103 1 5 303 3 2 2340 84 315 2604 186 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 66 20 15 2 1 2 3082 9035 10014 2249 98 1 2 5 23 2230 1 15 40 10498 10495 10493 10524 10490 9899 1031 10490 10493 9785 3121 66 66 10469 22 4 20 19 10497 10612 362 1396 195 9339 10624 18 7303 5884 10621 10620 10624 10653 8626 2326 10624 187 10486 8622 2322 2324