Total coverage: 218596 (15%)of 1548827
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 // SPDX-License-Identifier: GPL-2.0-or-later /* * USB SD Host Controller (USHC) controller driver. * * Copyright (C) 2010 Cambridge Silicon Radio Ltd. * * Notes: * - Only version 2 devices are supported. * - Version 2 devices only support SDIO cards/devices (R2 response is * unsupported). * * References: * [USHC] USB SD Host Controller specification (CS-118793-SP) */ #include <linux/module.h> #include <linux/usb.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/mmc/host.h> enum ushc_request { USHC_GET_CAPS = 0x00, USHC_HOST_CTRL = 0x01, USHC_PWR_CTRL = 0x02, USHC_CLK_FREQ = 0x03, USHC_EXEC_CMD = 0x04, USHC_READ_RESP = 0x05, USHC_RESET = 0x06, }; enum ushc_request_type { USHC_GET_CAPS_TYPE = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_HOST_CTRL_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_PWR_CTRL_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_CLK_FREQ_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_EXEC_CMD_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_READ_RESP_TYPE = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_RESET_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, }; #define USHC_GET_CAPS_VERSION_MASK 0xff #define USHC_GET_CAPS_3V3 (1 << 8) #define USHC_GET_CAPS_3V0 (1 << 9) #define USHC_GET_CAPS_1V8 (1 << 10) #define USHC_GET_CAPS_HIGH_SPD (1 << 16) #define USHC_HOST_CTRL_4BIT (1 << 1) #define USHC_HOST_CTRL_HIGH_SPD (1 << 0) #define USHC_PWR_CTRL_OFF 0x00 #define USHC_PWR_CTRL_3V3 0x01 #define USHC_PWR_CTRL_3V0 0x02 #define USHC_PWR_CTRL_1V8 0x03 #define USHC_READ_RESP_BUSY (1 << 4) #define USHC_READ_RESP_ERR_TIMEOUT (1 << 3) #define USHC_READ_RESP_ERR_CRC (1 << 2) #define USHC_READ_RESP_ERR_DAT (1 << 1) #define USHC_READ_RESP_ERR_CMD (1 << 0) #define USHC_READ_RESP_ERR_MASK 0x0f struct ushc_cbw { __u8 signature; __u8 cmd_idx; __le16 block_size; __le32 arg; } __attribute__((packed)); #define USHC_CBW_SIGNATURE 'C' struct ushc_csw { __u8 signature; __u8 status; __le32 response; } __attribute__((packed)); #define USHC_CSW_SIGNATURE 'S' struct ushc_int_data { u8 status; u8 reserved[3]; }; #define USHC_INT_STATUS_SDIO_INT (1 << 1) #define USHC_INT_STATUS_CARD_PRESENT (1 << 0) struct ushc_data { struct usb_device *usb_dev; struct mmc_host *mmc; struct urb *int_urb; struct ushc_int_data *int_data; struct urb *cbw_urb; struct ushc_cbw *cbw; struct urb *data_urb; struct urb *csw_urb; struct ushc_csw *csw; spinlock_t lock; struct mmc_request *current_req; u32 caps; u16 host_ctrl; unsigned long flags; u8 last_status; int clock_freq; }; #define DISCONNECTED 0 #define INT_EN 1 #define IGNORE_NEXT_INT 2 static void data_callback(struct urb *urb); static int ushc_hw_reset(struct ushc_data *ushc) { return usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_RESET, USHC_RESET_TYPE, 0, 0, NULL, 0, 100); } static int ushc_hw_get_caps(struct ushc_data *ushc) { int ret; int version; ret = usb_control_msg(ushc->usb_dev, usb_rcvctrlpipe(ushc->usb_dev, 0), USHC_GET_CAPS, USHC_GET_CAPS_TYPE, 0, 0, &ushc->caps, sizeof(ushc->caps), 100); if (ret < 0) return ret; ushc->caps = le32_to_cpu(ushc->caps); version = ushc->caps & USHC_GET_CAPS_VERSION_MASK; if (version != 0x02) { dev_err(&ushc->usb_dev->dev, "controller version %d is not supported\n", version); return -EINVAL; } return 0; } static int ushc_hw_set_host_ctrl(struct ushc_data *ushc, u16 mask, u16 val) { u16 host_ctrl; int ret; host_ctrl = (ushc->host_ctrl & ~mask) | val; ret = usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_HOST_CTRL, USHC_HOST_CTRL_TYPE, host_ctrl, 0, NULL, 0, 100); if (ret < 0) return ret; ushc->host_ctrl = host_ctrl; return 0; } static void int_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; u8 status, last_status; if (urb->status < 0) return; status = ushc->int_data->status; last_status = ushc->last_status; ushc->last_status = status; /* * Ignore the card interrupt status on interrupt transfers that * were submitted while card interrupts where disabled. * * This avoid occasional spurious interrupts when enabling * interrupts immediately after clearing the source on the card. */ if (!test_and_clear_bit(IGNORE_NEXT_INT, &ushc->flags) && test_bit(INT_EN, &ushc->flags) && status & USHC_INT_STATUS_SDIO_INT) { mmc_signal_sdio_irq(ushc->mmc); } if ((status ^ last_status) & USHC_INT_STATUS_CARD_PRESENT) mmc_detect_change(ushc->mmc, msecs_to_jiffies(100)); if (!test_bit(INT_EN, &ushc->flags)) set_bit(IGNORE_NEXT_INT, &ushc->flags); usb_submit_urb(ushc->int_urb, GFP_ATOMIC); } static void cbw_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; if (urb->status != 0) { usb_unlink_urb(ushc->data_urb); usb_unlink_urb(ushc->csw_urb); } } static void data_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; if (urb->status != 0) usb_unlink_urb(ushc->csw_urb); } static void csw_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; struct mmc_request *req = ushc->current_req; int status; status = ushc->csw->status; if (urb->status != 0) { req->cmd->error = urb->status; } else if (status & USHC_READ_RESP_ERR_CMD) { if (status & USHC_READ_RESP_ERR_CRC) req->cmd->error = -EIO; else req->cmd->error = -ETIMEDOUT; } if (req->data) { if (status & USHC_READ_RESP_ERR_DAT) { if (status & USHC_READ_RESP_ERR_CRC) req->data->error = -EIO; else req->data->error = -ETIMEDOUT; req->data->bytes_xfered = 0; } else { req->data->bytes_xfered = req->data->blksz * req->data->blocks; } } req->cmd->resp[0] = le32_to_cpu(ushc->csw->response); mmc_request_done(ushc->mmc, req); } static void ushc_request(struct mmc_host *mmc, struct mmc_request *req) { struct ushc_data *ushc = mmc_priv(mmc); int ret; unsigned long flags; spin_lock_irqsave(&ushc->lock, flags); if (test_bit(DISCONNECTED, &ushc->flags)) { ret = -ENODEV; goto out; } /* Version 2 firmware doesn't support the R2 response format. */ if (req->cmd->flags & MMC_RSP_136) { ret = -EINVAL; goto out; } /* The Astoria's data FIFOs don't work with clock speeds < 5MHz so limit commands with data to 6MHz or more. */ if (req->data && ushc->clock_freq < 6000000) { ret = -EINVAL; goto out; } ushc->current_req = req; /* Start cmd with CBW. */ ushc->cbw->cmd_idx = cpu_to_le16(req->cmd->opcode); if (req->data) ushc->cbw->block_size = cpu_to_le16(req->data->blksz); else ushc->cbw->block_size = 0; ushc->cbw->arg = cpu_to_le32(req->cmd->arg); ret = usb_submit_urb(ushc->cbw_urb, GFP_ATOMIC); if (ret < 0) goto out; /* Submit data (if any). */ if (req->data) { struct mmc_data *data = req->data; int pipe; if (data->flags & MMC_DATA_READ) pipe = usb_rcvbulkpipe(ushc->usb_dev, 6); else pipe = usb_sndbulkpipe(ushc->usb_dev, 2); usb_fill_bulk_urb(ushc->data_urb, ushc->usb_dev, pipe, NULL, data->sg->length, data_callback, ushc); ushc->data_urb->num_sgs = 1; ushc->data_urb->sg = data->sg; ret = usb_submit_urb(ushc->data_urb, GFP_ATOMIC); if (ret < 0) goto out; } /* Submit CSW. */ ret = usb_submit_urb(ushc->csw_urb, GFP_ATOMIC); out: spin_unlock_irqrestore(&ushc->lock, flags); if (ret < 0) { usb_unlink_urb(ushc->cbw_urb); usb_unlink_urb(ushc->data_urb); req->cmd->error = ret; mmc_request_done(mmc, req); } } static int ushc_set_power(struct ushc_data *ushc, unsigned char power_mode) { u16 voltage; switch (power_mode) { case MMC_POWER_OFF: voltage = USHC_PWR_CTRL_OFF; break; case MMC_POWER_UP: case MMC_POWER_ON: voltage = USHC_PWR_CTRL_3V3; break; default: return -EINVAL; } return usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_PWR_CTRL, USHC_PWR_CTRL_TYPE, voltage, 0, NULL, 0, 100); } static int ushc_set_bus_width(struct ushc_data *ushc, int bus_width) { return ushc_hw_set_host_ctrl(ushc, USHC_HOST_CTRL_4BIT, bus_width == 4 ? USHC_HOST_CTRL_4BIT : 0); } static int ushc_set_bus_freq(struct ushc_data *ushc, int clk, bool enable_hs) { int ret; /* Hardware can't detect interrupts while the clock is off. */ if (clk == 0) clk = 400000; ret = ushc_hw_set_host_ctrl(ushc, USHC_HOST_CTRL_HIGH_SPD, enable_hs ? USHC_HOST_CTRL_HIGH_SPD : 0); if (ret < 0) return ret; ret = usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_CLK_FREQ, USHC_CLK_FREQ_TYPE, clk & 0xffff, (clk >> 16) & 0xffff, NULL, 0, 100); if (ret < 0) return ret; ushc->clock_freq = clk; return 0; } static void ushc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct ushc_data *ushc = mmc_priv(mmc); ushc_set_power(ushc, ios->power_mode); ushc_set_bus_width(ushc, 1 << ios->bus_width); ushc_set_bus_freq(ushc, ios->clock, ios->timing == MMC_TIMING_SD_HS); } static int ushc_get_cd(struct mmc_host *mmc) { struct ushc_data *ushc = mmc_priv(mmc); return !!(ushc->last_status & USHC_INT_STATUS_CARD_PRESENT); } static void ushc_enable_sdio_irq(struct mmc_host *mmc, int enable) { struct ushc_data *ushc = mmc_priv(mmc); if (enable) set_bit(INT_EN, &ushc->flags); else clear_bit(INT_EN, &ushc->flags); } static void ushc_clean_up(struct ushc_data *ushc) { usb_free_urb(ushc->int_urb); usb_free_urb(ushc->csw_urb); usb_free_urb(ushc->data_urb); usb_free_urb(ushc->cbw_urb); kfree(ushc->int_data); kfree(ushc->cbw); kfree(ushc->csw); mmc_free_host(ushc->mmc); } static const struct mmc_host_ops ushc_ops = { .request = ushc_request, .set_ios = ushc_set_ios, .get_cd = ushc_get_cd, .enable_sdio_irq = ushc_enable_sdio_irq, }; static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usb_dev = interface_to_usbdev(intf); struct mmc_host *mmc; struct ushc_data *ushc; int ret; if (intf->cur_altsetting->desc.bNumEndpoints < 1) return -ENODEV; mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); if (mmc == NULL) return -ENOMEM; ushc = mmc_priv(mmc); usb_set_intfdata(intf, ushc); ushc->usb_dev = usb_dev; ushc->mmc = mmc; spin_lock_init(&ushc->lock); ret = ushc_hw_reset(ushc); if (ret < 0) goto err; /* Read capabilities. */ ret = ushc_hw_get_caps(ushc); if (ret < 0) goto err; mmc->ops = &ushc_ops; mmc->f_min = 400000; mmc->f_max = 50000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; mmc->caps |= (ushc->caps & USHC_GET_CAPS_HIGH_SPD) ? MMC_CAP_SD_HIGHSPEED : 0; mmc->max_seg_size = 512*511; mmc->max_segs = 1; mmc->max_req_size = 512*511; mmc->max_blk_size = 512; mmc->max_blk_count = 511; ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->int_urb == NULL) { ret = -ENOMEM; goto err; } ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL); if (ushc->int_data == NULL) { ret = -ENOMEM; goto err; } usb_fill_int_urb(ushc->int_urb, ushc->usb_dev, usb_rcvintpipe(usb_dev, intf->cur_altsetting->endpoint[0].desc.bEndpointAddress), ushc->int_data, sizeof(struct ushc_int_data), int_callback, ushc, intf->cur_altsetting->endpoint[0].desc.bInterval); ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->cbw_urb == NULL) { ret = -ENOMEM; goto err; } ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); if (ushc->cbw == NULL) { ret = -ENOMEM; goto err; } ushc->cbw->signature = USHC_CBW_SIGNATURE; usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2), ushc->cbw, sizeof(struct ushc_cbw), cbw_callback, ushc); ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->data_urb == NULL) { ret = -ENOMEM; goto err; } ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->csw_urb == NULL) { ret = -ENOMEM; goto err; } ushc->csw = kzalloc(sizeof(struct ushc_csw), GFP_KERNEL); if (ushc->csw == NULL) { ret = -ENOMEM; goto err; } usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6), ushc->csw, sizeof(struct ushc_csw), csw_callback, ushc); ret = mmc_add_host(ushc->mmc); if (ret) goto err; ret = usb_submit_urb(ushc->int_urb, GFP_KERNEL); if (ret < 0) { mmc_remove_host(ushc->mmc); goto err; } return 0; err: ushc_clean_up(ushc); return ret; } static void ushc_disconnect(struct usb_interface *intf) { struct ushc_data *ushc = usb_get_intfdata(intf); spin_lock_irq(&ushc->lock); set_bit(DISCONNECTED, &ushc->flags); spin_unlock_irq(&ushc->lock); usb_kill_urb(ushc->int_urb); usb_kill_urb(ushc->cbw_urb); usb_kill_urb(ushc->data_urb); usb_kill_urb(ushc->csw_urb); mmc_remove_host(ushc->mmc); ushc_clean_up(ushc); } static struct usb_device_id ushc_id_table[] = { /* CSR USB SD Host Controller */ { USB_DEVICE(0x0a12, 0x5d10) }, { }, }; MODULE_DEVICE_TABLE(usb, ushc_id_table); static struct usb_driver ushc_driver = { .name = "ushc", .id_table = ushc_id_table, .probe = ushc_probe, .disconnect = ushc_disconnect, }; module_usb_driver(ushc_driver); MODULE_DESCRIPTION("USB SD Host Controller driver"); MODULE_AUTHOR("David Vrabel <david.vrabel@csr.com>"); MODULE_LICENSE("GPL");
3 1 1 4 4 4 4 2 4 4 1 1 2 5 1 4 4 203 4 203 323 326 216 10 177 30 209 9 8 319 175 8 1 5 1 3 157 2 2 92 75 3 2 2 9 5 200 2 2 8 1 1 176 160 8 27 40 18 17 17 20 20 20 20 11 45 43 66 12 6 4 32 17 8 21 26 12 21 21 21 21 20 1 21 8 13 19 2 19 1 12 9 18 2 20 1 21 20 21 21 21 8 21 21 20 19 1 20 2 1 58 8 2 41 41 22 57 54 53 2 50 4 50 5 46 9 46 9 51 43 1 5 4 5 4 25 2 10 1 2 2 2 2 40 40 23 17 7 38 33 4 4 26 9 22 2 22 8 13 11 7 12 6 18 16 3 11 10 2 8 8 3 1 2 1 5 17 52 11 1 11 11 70 70 32 9 9 9 9 9 1 2 6 5 8 1 1 6 5 2 1 1 1 6 8 8 2 5 1 1 1 1 3 3 10 9 2 3 3 1 7 1 1 5 1 1 5 5 5 1 27 27 4 8 6 8 17 6 29 12 2 28 74 74 74 74 74 74 74 74 5 70 74 12 21 23 19 12 12 73 72 1 1 70 21 5 1 15 13 2 15 11 4 13 1 14 14 1 15 7 6 3 1 2 1 1 2 1 1 2 2 9 22 17 6 1 2 5 20 3 1 7 9 9 6 3 9 4 5 1 8 3 1 1 1 3 1 2 1 2 1 5 5 5 5 3 4 3 1 211 1 15 4 2 1 17 14 15 14 14 14 15 15 3 3 3 14 14 14 1 15 15 1 1 1 1 38 21 12 12 6 75 76 73 72 3 18 13 6 49 25 14 15 50 50 50 50 24 14 15 50 41 22 5 3 11 1 1 2 21 4 2 2 221 1 1 222 22 211 195 9 1 1 1 7 1 6 6 6 3 3 3 3 3 3 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/key/af_key.c An implementation of PF_KEYv2 sockets. * * Authors: Maxim Giryaev <gem@asplinux.ru> * David S. Miller <davem@redhat.com> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * Kazunori MIYAZAWA / USAGI Project <miyazawa@linux-ipv6.org> * Derek Atkins <derek@ihtfp.com> */ #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/socket.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/xfrm.h> #include <net/sock.h> #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) static unsigned int pfkey_net_id __read_mostly; struct netns_pfkey { /* List of all pfkey sockets. */ struct hlist_head table; atomic_t socks_nr; }; static DEFINE_MUTEX(pfkey_mutex); #define DUMMY_MARK 0 static const struct xfrm_mark dummy_mark = {0, 0}; struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; int registered; int promisc; struct { uint8_t msg_version; uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; struct sk_buff *skb; } dump; struct mutex dump_lock; }; static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family); static inline struct pfkey_sock *pfkey_sk(struct sock *sk) { return (struct pfkey_sock *)sk; } static int pfkey_can_dump(const struct sock *sk) { if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) return 1; return 0; } static void pfkey_terminate_dump(struct pfkey_sock *pfk) { if (pfk->dump.dump) { if (pfk->dump.skb) { kfree_skb(pfk->dump.skb); pfk->dump.skb = NULL; } pfk->dump.done(pfk); pfk->dump.dump = NULL; pfk->dump.done = NULL; } } static void pfkey_sock_destruct(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive pfkey socket: %p\n", sk); return; } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); atomic_dec(&net_pfkey->socks_nr); } static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); mutex_lock(&pfkey_mutex); sk_add_node_rcu(sk, &net_pfkey->table); mutex_unlock(&pfkey_mutex); } static void pfkey_remove(struct sock *sk) { mutex_lock(&pfkey_mutex); sk_del_node_init_rcu(sk); mutex_unlock(&pfkey_mutex); } static struct proto key_proto = { .name = "KEY", .owner = THIS_MODULE, .obj_size = sizeof(struct pfkey_sock), }; static int pfkey_create(struct net *net, struct socket *sock, int protocol, int kern) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; struct pfkey_sock *pfk; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; if (protocol != PF_KEY_V2) return -EPROTONOSUPPORT; sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) return -ENOMEM; pfk = pfkey_sk(sk); mutex_init(&pfk->dump_lock); sock->ops = &pfkey_ops; sock_init_data(sock, sk); sk->sk_family = PF_KEY; sk->sk_destruct = pfkey_sock_destruct; atomic_inc(&net_pfkey->socks_nr); pfkey_insert(sk); return 0; } static int pfkey_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; pfkey_remove(sk); sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_write_queue); synchronize_rcu(); sock_put(sk); return 0; } static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, struct sock *sk) { int err = -ENOBUFS; if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return err; skb = skb_clone(skb, allocation); if (skb) { skb_set_owner_r(skb, sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); err = 0; } return err; } /* Send SKB to all pfkey sockets matching selected criteria. */ #define BROADCAST_ALL 0 #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; int err = -ESRCH; /* XXX Do we need something like netlink_overrun? I think * XXX PF_KEY socket apps will not mind current behavior. */ if (!skb) return -ENOMEM; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; /* Yes, it means that if you are meant to receive this * pfkey message you receive it twice as promiscuous * socket. */ if (pfk->promisc) pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) continue; if (broadcast_flags != BROADCAST_ALL) { if (broadcast_flags & BROADCAST_PROMISC_ONLY) continue; if ((broadcast_flags & BROADCAST_REGISTERED) && !pfk->registered) continue; if (broadcast_flags & BROADCAST_ONE) continue; } err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* Error is cleared after successful sending to at least one * registered KM */ if ((broadcast_flags & BROADCAST_REGISTERED) && err) err = err2; } rcu_read_unlock(); if (one_sk != NULL) err = pfkey_broadcast_one(skb, allocation, one_sk); kfree_skb(skb); return err; } static int pfkey_do_dump(struct pfkey_sock *pfk) { struct sadb_msg *hdr; int rc; mutex_lock(&pfk->dump_lock); if (!pfk->dump.dump) { rc = 0; goto out; } rc = pfk->dump.dump(pfk); if (rc == -ENOBUFS) { rc = 0; goto out; } if (pfk->dump.skb) { if (!pfkey_can_dump(&pfk->sk)) { rc = 0; goto out; } hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } pfkey_terminate_dump(pfk); out: mutex_unlock(&pfk->dump_lock); return rc; } static inline void pfkey_hdr_dup(struct sadb_msg *new, const struct sadb_msg *orig) { *new = *orig; } static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) { struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); struct sadb_msg *hdr; if (!skb) return -ENOBUFS; /* Woe be to the platform trying to support PFKEY yet * having normal errnos outside the 1-255 range, inclusive. */ err = -err; if (err == ERESTARTSYS || err == ERESTARTNOHAND || err == ERESTARTNOINTR) err = EINTR; if (err >= 512) err = EINVAL; BUG_ON(err <= 0 || err >= 256); hdr = skb_put(skb, sizeof(struct sadb_msg)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = (uint8_t) err; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static const u8 sadb_ext_min_len[] = { [SADB_EXT_RESERVED] = (u8) 0, [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa), [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_HARD] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_SOFT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_ADDRESS_SRC] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_DST] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_PROXY] = (u8) sizeof(struct sadb_address), [SADB_EXT_KEY_AUTH] = (u8) sizeof(struct sadb_key), [SADB_EXT_KEY_ENCRYPT] = (u8) sizeof(struct sadb_key), [SADB_EXT_IDENTITY_SRC] = (u8) sizeof(struct sadb_ident), [SADB_EXT_IDENTITY_DST] = (u8) sizeof(struct sadb_ident), [SADB_EXT_SENSITIVITY] = (u8) sizeof(struct sadb_sens), [SADB_EXT_PROPOSAL] = (u8) sizeof(struct sadb_prop), [SADB_EXT_SUPPORTED_AUTH] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SUPPORTED_ENCRYPT] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SPIRANGE] = (u8) sizeof(struct sadb_spirange), [SADB_X_EXT_KMPRIVATE] = (u8) sizeof(struct sadb_x_kmprivate), [SADB_X_EXT_POLICY] = (u8) sizeof(struct sadb_x_policy), [SADB_X_EXT_SA2] = (u8) sizeof(struct sadb_x_sa2), [SADB_X_EXT_NAT_T_TYPE] = (u8) sizeof(struct sadb_x_nat_t_type), [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ static int verify_address_len(const void *p) { const struct sadb_address *sp = p; const struct sockaddr *addr = (const struct sockaddr *)(sp + 1); const struct sockaddr_in *sin; #if IS_ENABLED(CONFIG_IPV6) const struct sockaddr_in6 *sin6; #endif int len; if (sp->sadb_address_len < DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), sizeof(uint64_t))) return -EINVAL; switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 32) return -EINVAL; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 128) return -EINVAL; break; #endif default: /* It is user using kernel to keep track of security * associations for another protocol, such as * OSPF/RSVP/RIPV2/MIP. It is user's job to verify * lengths. * * XXX Actually, association/policy database is not yet * XXX able to cope with arbitrary sockaddr families. * XXX When it can, remove this -EINVAL. -DaveM */ return -EINVAL; } return 0; } static inline int sadb_key_len(const struct sadb_key *key) { int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8); return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes, sizeof(uint64_t)); } static int verify_key_len(const void *p) { const struct sadb_key *key = p; if (sadb_key_len(key) > key->sadb_key_len) return -EINVAL; return 0; } static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx) { return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + sec_ctx->sadb_x_ctx_len, sizeof(uint64_t)); } static inline int verify_sec_ctx_len(const void *p) { const struct sadb_x_sec_ctx *sec_ctx = p; int len = sec_ctx->sadb_x_ctx_len; if (len > PAGE_SIZE) return -EINVAL; len = pfkey_sec_ctx_len(sec_ctx); if (sec_ctx->sadb_x_sec_len != len) return -EINVAL; return 0; } static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx, gfp_t gfp) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; uctx = kmalloc((sizeof(*uctx)+ctx_size), gfp); if (!uctx) return NULL; uctx->len = pfkey_sec_ctx_len(sec_ctx); uctx->exttype = sec_ctx->sadb_x_sec_exttype; uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi; uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg; uctx->ctx_len = sec_ctx->sadb_x_ctx_len; memcpy(uctx + 1, sec_ctx + 1, uctx->ctx_len); return uctx; } static int present_and_same_family(const struct sadb_address *src, const struct sadb_address *dst) { const struct sockaddr *s_addr, *d_addr; if (!src || !dst) return 0; s_addr = (const struct sockaddr *)(src + 1); d_addr = (const struct sockaddr *)(dst + 1); if (s_addr->sa_family != d_addr->sa_family) return 0; if (s_addr->sa_family != AF_INET #if IS_ENABLED(CONFIG_IPV6) && s_addr->sa_family != AF_INET6 #endif ) return 0; return 1; } static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs) { const char *p = (char *) hdr; int len = skb->len; len -= sizeof(*hdr); p += sizeof(*hdr); while (len > 0) { const struct sadb_ext *ehdr = (const struct sadb_ext *) p; uint16_t ext_type; int ext_len; if (len < sizeof(*ehdr)) return -EINVAL; ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; if (ext_len < sizeof(uint64_t) || ext_len > len || ext_type == SADB_EXT_RESERVED) return -EINVAL; if (ext_type <= SADB_EXT_MAX) { int min = (int) sadb_ext_min_len[ext_type]; if (ext_len < min) return -EINVAL; if (ext_hdrs[ext_type-1] != NULL) return -EINVAL; switch (ext_type) { case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_X_EXT_NAT_T_OA: if (verify_address_len(p)) return -EINVAL; break; case SADB_X_EXT_SEC_CTX: if (verify_sec_ctx_len(p)) return -EINVAL; break; case SADB_EXT_KEY_AUTH: case SADB_EXT_KEY_ENCRYPT: if (verify_key_len(p)) return -EINVAL; break; default: break; } ext_hdrs[ext_type-1] = (void *) p; } p += ext_len; len -= ext_len; } return 0; } static uint16_t pfkey_satype2proto(uint8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: return IPSEC_PROTO_ANY; case SADB_SATYPE_AH: return IPPROTO_AH; case SADB_SATYPE_ESP: return IPPROTO_ESP; case SADB_X_SATYPE_IPCOMP: return IPPROTO_COMP; default: return 0; } /* NOTREACHED */ } static uint8_t pfkey_proto2satype(uint16_t proto) { switch (proto) { case IPPROTO_AH: return SADB_SATYPE_AH; case IPPROTO_ESP: return SADB_SATYPE_ESP; case IPPROTO_COMP: return SADB_X_SATYPE_IPCOMP; default: return 0; } /* NOTREACHED */ } /* BTW, this scheme means that there is no way with PFKEY2 sockets to * say specifically 'just raw sockets' as we encode them as 255. */ static uint8_t pfkey_proto_to_xfrm(uint8_t proto) { return proto == IPSEC_PROTO_ANY ? 0 : proto; } static uint8_t pfkey_proto_from_xfrm(uint8_t proto) { return proto ? proto : IPSEC_PROTO_ANY; } static inline int pfkey_sockaddr_len(sa_family_t family) { switch (family) { case AF_INET: return sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: return sizeof(struct sockaddr_in6); #endif } return 0; } static int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) { switch (sa->sa_family) { case AF_INET: xaddr->a4 = ((struct sockaddr_in *)sa)->sin_addr.s_addr; return AF_INET; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: memcpy(xaddr->a6, &((struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)); return AF_INET6; #endif } return 0; } static int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr) { return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), xaddr); } static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { const struct sadb_sa *sa; const struct sadb_address *addr; uint16_t proto; unsigned short family; xfrm_address_t *xaddr; sa = ext_hdrs[SADB_EXT_SA - 1]; if (sa == NULL) return NULL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return NULL; /* sadb_address_len should be checked by caller */ addr = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; if (addr == NULL) return NULL; family = ((const struct sockaddr *)(addr + 1))->sa_family; switch (family) { case AF_INET: xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr; break; #endif default: xaddr = NULL; } if (!xaddr) return NULL; return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) static int pfkey_sockaddr_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family)); } static inline int pfkey_mode_from_xfrm(int mode) { switch(mode) { case XFRM_MODE_TRANSPORT: return IPSEC_MODE_TRANSPORT; case XFRM_MODE_TUNNEL: return IPSEC_MODE_TUNNEL; case XFRM_MODE_BEET: return IPSEC_MODE_BEET; default: return -1; } } static inline int pfkey_mode_to_xfrm(int mode) { switch(mode) { case IPSEC_MODE_ANY: /*XXX*/ case IPSEC_MODE_TRANSPORT: return XFRM_MODE_TRANSPORT; case IPSEC_MODE_TUNNEL: return XFRM_MODE_TUNNEL; case IPSEC_MODE_BEET: return XFRM_MODE_BEET; default: return -1; } } static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port, struct sockaddr *sa, unsigned short family) { switch (family) { case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in *)sa; sin->sin_family = AF_INET; sin->sin_port = port; sin->sin_addr.s_addr = xaddr->a4; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return 32; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; sin6->sin6_addr = xaddr->in6; sin6->sin6_scope_id = 0; return 128; } #endif } return 0; } static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_lifetime *lifetime; struct sadb_address *addr; struct sadb_key *key; struct sadb_x_sa2 *sa2; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int size; int auth_key_size = 0; int encrypt_key_size = 0; int sockaddr_size; struct xfrm_encap_tmpl *natt = NULL; int mode; /* address family check */ sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return ERR_PTR(-EINVAL); /* base, SA, (lifetime (HSC),) address(SD), (address(P),) key(AE), (identity(SD),) (sensitivity)> */ size = sizeof(struct sadb_msg) +sizeof(struct sadb_sa) + sizeof(struct sadb_lifetime) + ((hsc & 1) ? sizeof(struct sadb_lifetime) : 0) + ((hsc & 2) ? sizeof(struct sadb_lifetime) : 0) + sizeof(struct sadb_address)*2 + sockaddr_size*2 + sizeof(struct sadb_x_sa2); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } /* identity & sensitivity */ if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { if (x->aalg && x->aalg->alg_key_len) { auth_key_size = PFKEY_ALIGN8((x->aalg->alg_key_len + 7) / 8); size += sizeof(struct sadb_key) + auth_key_size; } if (x->ealg && x->ealg->alg_key_len) { encrypt_key_size = PFKEY_ALIGN8((x->ealg->alg_key_len+7) / 8); size += sizeof(struct sadb_key) + encrypt_key_size; } } if (x->encap) natt = x->encap; if (natt && natt->encap_type) { size += sizeof(struct sadb_x_nat_t_type); size += sizeof(struct sadb_x_nat_t_port); size += sizeof(struct sadb_x_nat_t_port); } skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ hdr->sadb_msg_len = size / sizeof(uint64_t); /* sa */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = x->props.replay_window; switch (x->km.state) { case XFRM_STATE_VALID: sa->sadb_sa_state = x->km.dying ? SADB_SASTATE_DYING : SADB_SASTATE_MATURE; break; case XFRM_STATE_ACQ: sa->sadb_sa_state = SADB_SASTATE_LARVAL; break; default: sa->sadb_sa_state = SADB_SASTATE_DEAD; break; } sa->sadb_sa_auth = 0; if (x->aalg) { struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); sa->sadb_sa_auth = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_encrypt = 0; BUG_ON(x->ealg && x->calg); if (x->ealg) { struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } /* KAME compatible: sadb_sa_encrypt is overloaded with calg id */ if (x->calg) { struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_flags = 0; if (x->props.flags & XFRM_STATE_NOECN) sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; if (x->props.flags & XFRM_STATE_DECAP_DSCP) sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; if (x->props.flags & XFRM_STATE_NOPMTUDISC) sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC; /* hard time */ if (hsc & 2) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.hard_use_expires_seconds; } /* soft time */ if (hsc & 1) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds; } /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = x->curlft.packets; lifetime->sadb_lifetime_bytes = x->curlft.bytes; lifetime->sadb_lifetime_addtime = x->curlft.add_time; lifetime->sadb_lifetime_usetime = x->curlft.use_time; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; /* "if the ports are non-zero, then the sadb_address_proto field, normally zero, MUST be filled in with the transport protocol's number." - RFC2367 */ addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) { addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; addr->sadb_address_proto = pfkey_proto_from_xfrm(x->sel.proto); addr->sadb_address_prefixlen = x->sel.prefixlen_s; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&x->sel.saddr, x->sel.sport, (struct sockaddr *) (addr + 1), x->props.family); } /* auth key */ if (add_keys && auth_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + auth_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_AUTH; key->sadb_key_bits = x->aalg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->aalg->alg_key, (x->aalg->alg_key_len+7)/8); } /* encrypt key */ if (add_keys && encrypt_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + encrypt_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + encrypt_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT; key->sadb_key_bits = x->ealg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->ealg->alg_key, (x->ealg->alg_key_len+7)/8); } /* sa */ sa2 = skb_put(skb, sizeof(struct sadb_x_sa2)); sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t); sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2; if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) { kfree_skb(skb); return ERR_PTR(-EINVAL); } sa2->sadb_x_sa2_mode = mode; sa2->sadb_x_sa2_reserved1 = 0; sa2->sadb_x_sa2_reserved2 = 0; sa2->sadb_x_sa2_sequence = 0; sa2->sadb_x_sa2_reqid = x->props.reqid; if (natt && natt->encap_type) { struct sadb_x_nat_t_type *n_type; struct sadb_x_nat_t_port *n_port; /* type */ n_type = skb_put(skb, sizeof(*n_type)); n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t); n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; n_type->sadb_x_nat_t_type_type = natt->encap_type; n_type->sadb_x_nat_t_type_reserved[0] = 0; n_type->sadb_x_nat_t_type_reserved[1] = 0; n_type->sadb_x_nat_t_type_reserved[2] = 0; /* source port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* dest port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = natt->encap_dport; n_port->sadb_x_nat_t_port_reserved = 0; } /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x) { struct sk_buff *skb; skb = __pfkey_xfrm_state2msg(x, 1, 3); return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x, int hsc) { return __pfkey_xfrm_state2msg(x, 0, hsc); } static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct xfrm_state *x; const struct sadb_lifetime *lifetime; const struct sadb_sa *sa; const struct sadb_key *key; const struct sadb_x_sec_ctx *sec_ctx; uint16_t proto; int err; sa = ext_hdrs[SADB_EXT_SA - 1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_ESP && !ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_AH && !ext_hdrs[SADB_EXT_KEY_AUTH-1]) return ERR_PTR(-EINVAL); if (!!ext_hdrs[SADB_EXT_LIFETIME_HARD-1] != !!ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) return ERR_PTR(-EINVAL); proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return ERR_PTR(-EINVAL); /* default error is no buffer space */ err = -ENOBUFS; /* RFC2367: Only SADB_SASTATE_MATURE SAs may be submitted in an SADB_ADD message. SADB_SASTATE_LARVAL SAs are created by SADB_GETSPI and it is not sensible to add a new SA in the DYING or SADB_SASTATE_DEAD state. Therefore, the sadb_sa_state field of all submitted SAs MUST be SADB_SASTATE_MATURE and the kernel MUST return an error if this is not true. However, KAME setkey always uses SADB_SASTATE_LARVAL. Hence, we have to _ignore_ sadb_sa_state, which is also reasonable. */ if (sa->sadb_sa_auth > SADB_AALG_MAX || (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP && sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key != NULL && sa->sadb_sa_encrypt != SADB_EALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); x = xfrm_state_alloc(net); if (x == NULL) return ERR_PTR(-ENOBUFS); x->id.proto = proto; x->id.spi = sa->sadb_sa_spi; x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay, (sizeof(x->replay.bitmap) * 8)); if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN) x->props.flags |= XFRM_STATE_NOECN; if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) x->props.flags |= XFRM_STATE_DECAP_DSCP; if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT - 1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) goto out; err = security_xfrm_state_alloc(x, uctx); kfree(uctx); if (err) goto out; } err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); if (!x->aalg) { err = -ENOMEM; goto out; } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { x->aalg->alg_key_len = key->sadb_key_bits; memcpy(x->aalg->alg_key, key+1, keysize); } x->aalg->alg_trunc_len = a->uinfo.auth.icv_truncbits; x->props.aalgo = sa->sadb_sa_auth; /* x->algo.flags = sa->sadb_sa_flags; */ } if (sa->sadb_sa_encrypt) { if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); if (!x->calg) { err = -ENOMEM; goto out; } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { int keysize = 0; struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); if (!x->ealg) { err = -ENOMEM; goto out; } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { x->ealg->alg_key_len = key->sadb_key_bits; memcpy(x->ealg->alg_key, key+1, keysize); } x->props.ealgo = sa->sadb_sa_encrypt; x->geniv = a->uinfo.encr.geniv; } } /* x->algo.flags = sa->sadb_sa_flags; */ x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], &x->props.saddr); pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], &x->id.daddr); if (ext_hdrs[SADB_X_EXT_SA2-1]) { const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1]; int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) { err = -EINVAL; goto out; } x->props.mode = mode; x->props.reqid = sa2->sadb_x_sa2_reqid; } if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; /* Nobody uses this, but we try. */ x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); x->sel.prefixlen_s = addr->sadb_address_prefixlen; } if (!x->sel.family) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); if (!x->encap) { err = -ENOMEM; goto out; } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; natt->encap_type = n_type->sadb_x_nat_t_type_type; if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; natt->encap_sport = n_port->sadb_x_nat_t_port_port; } if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } } err = xfrm_init_state(x); if (err) goto out; x->km.seq = hdr->sadb_msg_seq; return x; out: x->km.state = XFRM_STATE_DEAD; xfrm_state_put(x); return ERR_PTR(err); } static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -EOPNOTSUPP; } static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct sk_buff *resp_skb; struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; struct sadb_msg *out_hdr; struct sadb_spirange *range; struct xfrm_state *x = NULL; int mode; int err; u32 min_spi, max_spi; u32 reqid; u8 proto; unsigned short family; xfrm_address_t *xsaddr = NULL, *xdaddr = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) { mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) return -EINVAL; reqid = sa2->sadb_x_sa2_reqid; } else { mode = 0; reqid = 0; } saddr = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; daddr = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; family = ((struct sockaddr *)(saddr + 1))->sa_family; switch (family) { case AF_INET: xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr; break; #endif } if (hdr->sadb_msg_seq) { x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; } } if (!x) x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; min_spi = 0x100; max_spi = 0x0fffffff; range = ext_hdrs[SADB_EXT_SPIRANGE-1]; if (range) { min_spi = range->sadb_spirange_min; max_spi = range->sadb_spirange_max; } err = verify_spi_info(x->id.proto, min_spi, max_spi, NULL); if (err) { xfrm_state_put(x); return err; } err = xfrm_alloc_spi(x, min_spi, max_spi, NULL); resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); if (IS_ERR(resp_skb)) { xfrm_state_put(x); return PTR_ERR(resp_skb); } out_hdr = (struct sadb_msg *) resp_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GETSPI; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; xfrm_state_put(x); pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8) return -EOPNOTSUPP; if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x == NULL) return 0; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_ACQ) x->km.state = XFRM_STATE_ERROR; spin_unlock_bh(&x->lock); xfrm_state_put(x); return 0; } static inline int event2poltype(int event) { switch (event) { case XFRM_MSG_DELPOLICY: return SADB_X_SPDDELETE; case XFRM_MSG_NEWPOLICY: return SADB_X_SPDADD; case XFRM_MSG_UPDPOLICY: return SADB_X_SPDUPDATE; case XFRM_MSG_POLEXPIRE: // return SADB_X_SPDEXPIRE; default: pr_err("pfkey: Unknown policy event %d\n", event); break; } return 0; } static inline int event2keytype(int event) { switch (event) { case XFRM_MSG_DELSA: return SADB_DELETE; case XFRM_MSG_NEWSA: return SADB_ADD; case XFRM_MSG_UPDSA: return SADB_UPDATE; case XFRM_MSG_EXPIRE: return SADB_EXPIRE; default: pr_err("pfkey: Unknown SA event %d\n", event); break; } return 0; } /* ADD/UPD/DEL */ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = pfkey_xfrm_state2msg(x); if (IS_ERR(skb)) return PTR_ERR(skb); hdr = (struct sadb_msg *) skb->data; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = event2keytype(c->event); hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; int err; struct km_event c; x = pfkey_msg2xfrm_state(net, hdr, ext_hdrs); if (IS_ERR(x)) return PTR_ERR(x); xfrm_state_hold(x); if (hdr->sadb_msg_type == SADB_ADD) err = xfrm_state_add(x); else err = xfrm_state_update(x); xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); goto out; } if (hdr->sadb_msg_type == SADB_ADD) c.event = XFRM_MSG_NEWSA; else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); return err; } static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; struct km_event c; int err; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; if ((err = security_xfrm_state_delete(x))) goto out; if (xfrm_state_kern(x)) { err = -EPERM; goto out; } err = xfrm_state_delete(x); if (err < 0) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); __u8 proto; struct sk_buff *out_skb; struct sadb_msg *out_hdr; struct xfrm_state *x; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; out_skb = pfkey_xfrm_state2msg(x); proto = x->id.proto; xfrm_state_put(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GET; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, gfp_t allocation) { struct sk_buff *skb; struct sadb_msg *hdr; int len, auth_len, enc_len, i; auth_len = xfrm_count_pfkey_auth_supported(); if (auth_len) { auth_len *= sizeof(struct sadb_alg); auth_len += sizeof(struct sadb_supported); } enc_len = xfrm_count_pfkey_enc_supported(); if (enc_len) { enc_len *= sizeof(struct sadb_alg); enc_len += sizeof(struct sadb_supported); } len = enc_len + auth_len + sizeof(struct sadb_msg); skb = alloc_skb(len + 16, allocation); if (!skb) goto out_put_algs; hdr = skb_put(skb, sizeof(*hdr)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = 0; hdr->sadb_msg_len = len / sizeof(uint64_t); if (auth_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, auth_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = auth_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; for (i = 0; ; i++) { struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg->available) *ap++ = aalg->desc; } } if (enc_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, enc_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = enc_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; for (i = 0; ; i++) { struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (ealg->available) *ap++ = ealg->desc; } } out_put_algs: return skb; } static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *supp_skb; if (hdr->sadb_msg_satype > SADB_SATYPE_MAX) return -EINVAL; if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) { if (pfk->registered&(1<<hdr->sadb_msg_satype)) return -EEXIST; pfk->registered |= (1<<hdr->sadb_msg_satype); } mutex_lock(&pfkey_mutex); xfrm_probe_algs(); supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); mutex_unlock(&pfkey_mutex); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<<hdr->sadb_msg_satype); return -ENOBUFS; } pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk)); return 0; } static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put_data(skb, ihdr, sizeof(struct sadb_msg)); hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int proto; struct km_event c; int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; err = xfrm_state_flush(net, proto, true, false); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ err = 0; return err ? err : err2; } c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); return 0; } static int dump_sa(struct xfrm_state *x, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_state2msg(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_DUMP; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sa(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_state_walk(net, &pfk->dump.u.state, dump_sa, (void *) pfk); } static void pfkey_dump_sa_done(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); xfrm_state_walk_done(&pfk->dump.u.state, net); } static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { u8 proto; struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; if ((xfilter->sadb_x_filter_splen > (sizeof(xfrm_address_t) << 3)) || (xfilter->sadb_x_filter_dplen > (sizeof(xfrm_address_t) << 3))) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); return -ENOMEM; } memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, sizeof(xfrm_address_t)); memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr, sizeof(xfrm_address_t)); filter->family = xfilter->sadb_x_filter_family; filter->splen = xfilter->sadb_x_filter_splen; filter->dplen = xfilter->sadb_x_filter_dplen; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); int satype = hdr->sadb_msg_satype; bool reset_errno = false; if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { reset_errno = true; if (satype != 0 && satype != 1) return -EINVAL; pfk->promisc = satype; } if (reset_errno && skb_cloned(skb)) skb = skb_copy(skb, GFP_KERNEL); else skb = skb_clone(skb, GFP_KERNEL); if (reset_errno && skb) { struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data; new_hdr->sadb_msg_errno = 0; } pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) { int i; u32 reqid = *(u32*)ptr; for (i=0; i<xp->xfrm_nr; i++) { if (xp->xfrm_vec[i].reqid == reqid) return -EEXIST; } return 0; } static u32 gen_reqid(struct net *net) { struct xfrm_policy_walk walk; u32 start; int rc; static u32 reqid = IPSEC_MANUAL_REQID_MAX; start = reqid; do { ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); xfrm_policy_walk_done(&walk, net); if (rc != -EEXIST) return reqid; } while (reqid != start); return 0; } static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol, struct sadb_x_ipsecrequest *rq) { struct net *net = xp_net(xp); struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; int mode; if (xp->xfrm_nr >= XFRM_MAX_DEPTH) return -ELOOP; if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) return -EINVAL; t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) { if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) && pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND) return -EINVAL; t->optional = 1; } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) { t->reqid = rq->sadb_x_ipsecrequest_reqid; if (t->reqid > IPSEC_MANUAL_REQID_MAX) t->reqid = 0; if (!t->reqid && !(t->reqid = gen_reqid(net))) return -ENOBUFS; } /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { int err; err = parse_sockaddr_pair( (struct sockaddr *)(rq + 1), rq->sadb_x_ipsecrequest_len - sizeof(*rq), &t->saddr, &t->id.daddr, &t->encap_family); if (err) return err; } else t->encap_family = xp->family; /* No way to set this via kame pfkey */ t->allalgs = 1; xp->xfrm_nr++; return 0; } static int parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) { int err; int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy); struct sadb_x_ipsecrequest *rq = (void*)(pol+1); if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) return -EINVAL; while (len >= sizeof(*rq)) { if (len < rq->sadb_x_ipsecrequest_len || rq->sadb_x_ipsecrequest_len < sizeof(*rq)) return -EINVAL; if ((err = parse_ipsecrequest(xp, pol, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len); } return 0; } static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp) { struct xfrm_sec_ctx *xfrm_ctx = xp->security; if (xfrm_ctx) { int len = sizeof(struct sadb_x_sec_ctx); len += xfrm_ctx->ctx_len; return PFKEY_ALIGN8(len); } return 0; } static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp) { const struct xfrm_tmpl *t; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = 0; int i; for (i=0; i<xp->xfrm_nr; i++) { t = xp->xfrm_vec + i; socklen += pfkey_sockaddr_len(t->encap_family); } return sizeof(struct sadb_msg) + (sizeof(struct sadb_lifetime) * 3) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy) + (xp->xfrm_nr * sizeof(struct sadb_x_ipsecrequest)) + (socklen * 2) + pfkey_xfrm_policy2sec_ctx_size(xp); } static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp) { struct sk_buff *skb; int size; size = pfkey_xfrm_policy2msg_size(xp); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); return skb; } static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir) { struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_lifetime *lifetime; struct sadb_x_policy *pol; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int i; int size; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = pfkey_sockaddr_len(xp->family); size = pfkey_xfrm_policy2msg_size(xp); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_s; addr->sadb_address_reserved = 0; if (!pfkey_sockaddr_fill(&xp->selector.saddr, xp->selector.sport, (struct sockaddr *) (addr + 1), xp->family)) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_d; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&xp->selector.daddr, xp->selector.dport, (struct sockaddr *) (addr + 1), xp->family); /* hard time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds; /* soft time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds; /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = xp->curlft.packets; lifetime->sadb_lifetime_bytes = xp->curlft.bytes; lifetime->sadb_lifetime_addtime = xp->curlft.add_time; lifetime->sadb_lifetime_usetime = xp->curlft.use_time; pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD; if (xp->action == XFRM_POLICY_ALLOW) { if (xp->xfrm_nr) pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; else pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; for (i=0; i<xp->xfrm_nr; i++) { const struct xfrm_tmpl *t = xp->xfrm_vec + i; struct sadb_x_ipsecrequest *rq; int req_size; int mode; req_size = sizeof(struct sadb_x_ipsecrequest); if (t->mode == XFRM_MODE_TUNNEL) { socklen = pfkey_sockaddr_len(t->encap_family); req_size += socklen * 2; } else { size -= 2*socklen; } rq = skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; memset(rq, 0, sizeof(*rq)); rq->sadb_x_ipsecrequest_len = req_size; rq->sadb_x_ipsecrequest_proto = t->id.proto; if ((mode = pfkey_mode_from_xfrm(t->mode)) < 0) return -EINVAL; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE; if (t->reqid) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE; if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; if (t->mode == XFRM_MODE_TUNNEL) { u8 *sa = (void *)(rq + 1); pfkey_sockaddr_fill(&t->saddr, 0, (struct sockaddr *)sa, t->encap_family); pfkey_sockaddr_fill(&t->id.daddr, 0, (struct sockaddr *) (sa + socklen), t->encap_family); } } /* security context */ if ((xfrm_ctx = xp->security)) { int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp); sec_ctx = skb_put(skb, ctx_size); sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_reserved = refcount_read(&xp->refcnt); return 0; } static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; if (c->data.byid && c->event == XFRM_MSG_DELPOLICY) out_hdr->sadb_msg_type = SADB_X_SPDDELETE2; else out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err = 0; struct sadb_lifetime *lifetime; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (pol->sadb_x_policy_type > IPSEC_POLICY_IPSEC) return -EINVAL; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) return -ENOBUFS; xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->priority = pol->sadb_x_policy_priority; sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); xp->selector.family = xp->family; xp->selector.prefixlen_s = sa->sadb_address_prefixlen; xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.sport) xp->selector.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); xp->selector.prefixlen_d = sa->sadb_address_prefixlen; /* Amusing, we set this twice. KAME apps appear to set same value * in both addresses. */ xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.dport) xp->selector.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) { err = -ENOBUFS; goto out; } err = security_xfrm_policy_alloc(&xp->security, uctx, GFP_KERNEL); kfree(uctx); if (err) goto out; } xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD-1]) != NULL) { xp->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) != NULL) { xp->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (err = parse_ipsecrequests(xp, pol)) < 0) goto out; err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) goto out; if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) c.event = XFRM_MSG_UPDPOLICY; else c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); return 0; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct xfrm_selector sel; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *pol_ctx = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; memset(&sel, 0, sizeof(sel)); sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) return -ENOMEM; err = security_xfrm_policy_alloc(&pol_ctx, uctx, GFP_KERNEL); kfree(uctx); if (err) return err; } xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); if (xp == NULL) return -ENOENT; xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); out: xfrm_pol_put(xp); return err; } static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir) { int err; struct sk_buff *out_skb; struct sadb_msg *out_hdr; err = 0; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) { err = PTR_ERR(out_skb); goto out; } err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); goto out; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = hdr->sadb_msg_type; out_hdr->sadb_msg_satype = 0; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: return err; } static int pfkey_sockaddr_pair_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { int af, socklen; if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; *family = af; return 0; } #ifdef CONFIG_NET_KEY_MIGRATE static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct xfrm_migrate *m) { int err; struct sadb_x_ipsecrequest *rq2; int mode; if (len < sizeof(*rq1) || len < rq1->sadb_x_ipsecrequest_len || rq1->sadb_x_ipsecrequest_len < sizeof(*rq1)) return -EINVAL; /* old endoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), rq1->sadb_x_ipsecrequest_len - sizeof(*rq1), &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); len -= rq1->sadb_x_ipsecrequest_len; if (len <= sizeof(*rq2) || len < rq2->sadb_x_ipsecrequest_len || rq2->sadb_x_ipsecrequest_len < sizeof(*rq2)) return -EINVAL; /* new endpoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), rq2->sadb_x_ipsecrequest_len - sizeof(*rq2), &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; if (rq1->sadb_x_ipsecrequest_proto != rq2->sadb_x_ipsecrequest_proto || rq1->sadb_x_ipsecrequest_mode != rq2->sadb_x_ipsecrequest_mode || rq1->sadb_x_ipsecrequest_reqid != rq2->sadb_x_ipsecrequest_reqid) return -EINVAL; m->proto = rq1->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq1->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; m->mode = mode; m->reqid = rq1->sadb_x_ipsecrequest_reqid; return ((int)(rq1->sadb_x_ipsecrequest_len + rq2->sadb_x_ipsecrequest_len)); } static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; struct xfrm_kmaddress k; struct net *net = sock_net(sk); if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } if (kma) { /* convert sadb_x_kmaddress to xfrm_kmaddress */ k.reserved = kma->sadb_x_kmaddress_reserved; ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), &k.local, &k.remote, &k.family); if (ret < 0) { err = ret; goto out; } } dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); /* set source address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_SRC - 1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); /* set destination address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); rq = (struct sadb_x_ipsecrequest *)(pol + 1); /* extract ipsecrequests */ i = 0; len = pol->sadb_x_policy_len * 8 - sizeof(struct sadb_x_policy); while (len > 0 && i < XFRM_MAX_DEPTH) { ret = ipsecrequests_to_migrate(rq, len, &m[i]); if (ret < 0) { err = ret; goto out; } else { rq = (struct sadb_x_ipsecrequest *)((u8 *)rq + ret); len -= ret; i++; } } if (!i || len > 0) { err = -EINVAL; goto out; } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, kma ? &k : NULL, net, NULL, 0, NULL); out: return err; } #else static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -ENOPROTOOPT; } #endif static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int dir; int err = 0, delete; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) return -EINVAL; dir = xfrm_policy_id2dir(pol->sadb_x_policy_id); if (dir >= XFRM_POLICY_MAX) return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; if (delete) { xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); } else { err = key_pol_get_resp(sk, xp, hdr, dir); } out: xfrm_pol_put(xp); return err; } static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sp(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_policy_walk(net, &pfk->dump.u.policy, dump_sp, (void *) pfk); } static void pfkey_dump_sp_done(struct pfkey_sock *pfk) { struct net *net = sock_net((struct sock *)pfk); xfrm_policy_walk_done(&pfk->dump.u.policy, net); } static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int key_notify_policy_flush(const struct km_event *c) { struct sk_buff *skb_out; struct sadb_msg *hdr; skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb_out) return -ENOBUFS; hdr = skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct km_event c; int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ return 0; return err; } c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); return 0; } typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs); static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_RESERVED] = pfkey_reserved, [SADB_GETSPI] = pfkey_getspi, [SADB_UPDATE] = pfkey_add, [SADB_ADD] = pfkey_add, [SADB_DELETE] = pfkey_delete, [SADB_GET] = pfkey_get, [SADB_ACQUIRE] = pfkey_acquire, [SADB_REGISTER] = pfkey_register, [SADB_EXPIRE] = NULL, [SADB_FLUSH] = pfkey_flush, [SADB_DUMP] = pfkey_dump, [SADB_X_PROMISC] = pfkey_promisc, [SADB_X_PCHANGE] = NULL, [SADB_X_SPDUPDATE] = pfkey_spdadd, [SADB_X_SPDADD] = pfkey_spdadd, [SADB_X_SPDDELETE] = pfkey_spddelete, [SADB_X_SPDGET] = pfkey_spdget, [SADB_X_SPDACQUIRE] = NULL, [SADB_X_SPDDUMP] = pfkey_spddump, [SADB_X_SPDFLUSH] = pfkey_spdflush, [SADB_X_SPDSETIDX] = pfkey_spdadd, [SADB_X_SPDDELETE2] = pfkey_spdget, [SADB_X_MIGRATE] = pfkey_migrate, }; static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr) { void *ext_hdrs[SADB_EXT_MAX]; int err; /* Non-zero return value of pfkey_broadcast() does not always signal * an error and even on an actual error we may still want to process * the message so rather ignore the return value. */ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); if (!err) { err = -EOPNOTSUPP; if (pfkey_funcs[hdr->sadb_msg_type]) err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs); } return err; } static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp) { struct sadb_msg *hdr = NULL; if (skb->len < sizeof(*hdr)) { *errp = -EMSGSIZE; } else { hdr = (struct sadb_msg *) skb->data; if (hdr->sadb_msg_version != PF_KEY_V2 || hdr->sadb_msg_reserved != 0 || (hdr->sadb_msg_type <= SADB_RESERVED || hdr->sadb_msg_type > SADB_MAX)) { hdr = NULL; *errp = -EINVAL; } else if (hdr->sadb_msg_len != (skb->len / sizeof(uint64_t)) || hdr->sadb_msg_len < (sizeof(struct sadb_msg) / sizeof(uint64_t))) { hdr = NULL; *errp = -EMSGSIZE; } else { *errp = 0; } } return hdr; } static inline int aalg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->aalgos) * 8) return 0; return (t->aalgos >> id) & 1; } static inline int ealg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->ealgos) * 8) return 0; return (t->ealgos >> id) & 1; } static int count_ah_combs(const struct xfrm_tmpl *t) { int i, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); } static int count_esp_combs(const struct xfrm_tmpl *t) { int i, k, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg) && aalg->available) { struct sadb_comb *c; c = skb_put_zero(skb, sizeof(struct sadb_comb)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i=0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { struct sadb_comb *c; const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) continue; c = skb_put(skb, sizeof(struct sadb_comb)); memset(c, 0, sizeof(*c)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_encrypt = ealg->desc.sadb_alg_id; c->sadb_comb_encrypt_minbits = ealg->desc.sadb_alg_minbits; c->sadb_comb_encrypt_maxbits = ealg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) { return 0; } static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int hard; int hsc; hard = c->data.hard; if (hard) hsc = 2; else hsc = 1; out_skb = pfkey_xfrm_state2msg_expire(x, hsc); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; out_hdr->sadb_msg_type = SADB_EXPIRE; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); return 0; } static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = x ? xs_net(x) : c->net; struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); if (atomic_read(&net_pfkey->socks_nr) == 0) return 0; switch (c->event) { case XFRM_MSG_EXPIRE: return key_notify_sa_expire(x, c); case XFRM_MSG_DELSA: case XFRM_MSG_NEWSA: case XFRM_MSG_UPDSA: return key_notify_sa(x, c); case XFRM_MSG_FLUSHSA: return key_notify_sa_flush(c); case XFRM_MSG_NEWAE: /* not yet supported */ break; default: pr_err("pfkey: Unknown SA event %d\n", c->event); break; } return 0; } static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) return 0; switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); case XFRM_MSG_DELPOLICY: case XFRM_MSG_NEWPOLICY: case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: if (c->data.type != XFRM_POLICY_TYPE_MAIN) break; return key_notify_policy_flush(c); default: pr_err("pfkey: Unknown policy event %d\n", c->event); break; } return 0; } static u32 get_acqseq(void) { u32 res; static atomic_t acqseq; do { res = atomic_inc_return(&acqseq); } while (!res); return res; } static bool pfkey_is_alive(const struct km_event *c) { struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id); struct sock *sk; bool is_alive = false; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { if (pfkey_sk(sk)->registered) { is_alive = true; break; } } rcu_read_unlock(); return is_alive; } static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_x_policy *pol; int sockaddr_size; int size; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; size = sizeof(struct sadb_msg) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_ACQUIRE; hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq = get_acqseq(); hdr->sadb_msg_pid = 0; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ alg_size = 0; if (x->id.proto == IPPROTO_AH) alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) alg_size = dump_esp_combs(skb, t); hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct net *net = sock_net(sk); struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #endif default: *dir = -EINVAL; return NULL; } *dir = -EINVAL; if (len < sizeof(struct sadb_x_policy) || pol->sadb_x_policy_len*8 > len || pol->sadb_x_policy_type > IPSEC_POLICY_BYPASS || (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND)) return NULL; xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; } xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (*dir = parse_ipsecrequests(xp, pol)) < 0) goto out; /* security context too */ if (len >= (pol->sadb_x_policy_len*8 + sizeof(struct sadb_x_sec_ctx))) { char *p = (char *)pol; struct xfrm_user_sec_ctx *uctx; p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); *dir = security_xfrm_policy_alloc(&xp->security, uctx, GFP_ATOMIC); kfree(uctx); if (*dir) goto out; } *dir = pol->sadb_x_policy_dir-1; return xp; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_address *addr; struct sadb_x_nat_t_port *n_port; int sockaddr_size; int size; __u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0); struct xfrm_encap_tmpl *natt = NULL; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; if (!satype) return -EINVAL; if (!x->encap) return -EINVAL; natt = x->encap; /* Build an SADB_X_NAT_T_NEW_MAPPING message: * * HDR | SA | ADDRESS_SRC (old addr) | NAT_T_SPORT (old port) | * ADDRESS_DST (new addr) | NAT_T_DPORT (new port) */ size = sizeof(struct sadb_msg) + sizeof(struct sadb_sa) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + (sizeof(struct sadb_x_nat_t_port) * 2); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING; hdr->sadb_msg_satype = satype; hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = 0; sa->sadb_sa_state = 0; sa->sadb_sa_auth = 0; sa->sadb_sa_encrypt = 0; sa->sadb_sa_flags = 0; /* ADDRESS_SRC (old addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_SPORT (old port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* ADDRESS_DST (new addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(ipaddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_DPORT (new port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE static int set_sadb_address(struct sk_buff *skb, int sasize, int type, const struct xfrm_selector *sel) { struct sadb_address *addr; addr = skb_put(skb, sizeof(struct sadb_address) + sasize); addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; addr->sadb_address_exttype = type; addr->sadb_address_proto = sel->proto; addr->sadb_address_reserved = 0; switch (type) { case SADB_EXT_ADDRESS_SRC: addr->sadb_address_prefixlen = sel->prefixlen_s; pfkey_sockaddr_fill(&sel->saddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; case SADB_EXT_ADDRESS_DST: addr->sadb_address_prefixlen = sel->prefixlen_d; pfkey_sockaddr_fill(&sel->daddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; default: return -EINVAL; } return 0; } static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k) { struct sadb_x_kmaddress *kma; u8 *sa; int family = k->family; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = (sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(family)); kma = skb_put_zero(skb, size_req); kma->sadb_x_kmaddress_len = size_req / 8; kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; kma->sadb_x_kmaddress_reserved = k->reserved; sa = (u8 *)(kma + 1); if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) return -EINVAL; return 0; } static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; u8 *sa; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(family); rq = skb_put_zero(skb, size_req); rq->sadb_x_ipsecrequest_len = size_req; rq->sadb_x_ipsecrequest_proto = proto; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = level; rq->sadb_x_ipsecrequest_reqid = reqid; sa = (u8 *) (rq + 1); if (!pfkey_sockaddr_fill(src, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(dst, 0, (struct sockaddr *)(sa + socklen), family)) return -EINVAL; return 0; } #endif #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { int i; int sasize_sel; int size = 0; int size_pol = 0; struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_x_policy *pol; const struct xfrm_migrate *mp; if (type != XFRM_POLICY_TYPE_MAIN) return 0; if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; if (k != NULL) { /* addresses for KM */ size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(k->family)); } /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) return -EINVAL; size += (sizeof(struct sadb_address) + sasize_sel) * 2; /* policy info */ size_pol += sizeof(struct sadb_x_policy); /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->old_family); /* new locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->new_family); } size += sizeof(struct sadb_msg) + size_pol; /* alloc buffer */ skb = alloc_skb(size, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_MIGRATE; hdr->sadb_msg_satype = pfkey_proto2satype(m->proto); hdr->sadb_msg_len = size / 8; hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; /* Addresses to be used by KM for negotiation, if ext is available */ if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) goto err; /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); /* selector dst */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel); /* policy information */ pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = size_pol / 8; pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old ipsecrequest */ int mode = pfkey_mode_from_xfrm(mp->mode); if (mode < 0) goto err; if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->old_family, &mp->old_saddr, &mp->old_daddr) < 0) goto err; /* new ipsecrequest */ if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->new_family, &mp->new_saddr, &mp->new_daddr) < 0) goto err; } /* broadcast migrate message to sockets */ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; err: kfree_skb(skb); return -EINVAL; } #else static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; } #endif static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sadb_msg *hdr = NULL; int err; struct net *net = sock_net(sk); err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) goto out; err = -EMSGSIZE; if ((unsigned int)len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; skb = alloc_skb(len, GFP_KERNEL); if (skb == NULL) goto out; err = -EFAULT; if (memcpy_from_msg(skb_put(skb,len), msg, len)) goto out; hdr = pfkey_get_base_msg(skb, &err); if (!hdr) goto out; mutex_lock(&net->xfrm.xfrm_cfg_mutex); err = pfkey_process(sk, skb, hdr); mutex_unlock(&net->xfrm.xfrm_cfg_mutex); out: if (err && hdr && pfkey_error(hdr, err, sk) == 0) err = 0; kfree_skb(skb); return err ? : len; } static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *skb; int copied, err; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; copied = skb->len; if (copied > len) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free; sock_recv_cmsgs(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; if (pfk->dump.dump != NULL && 3 * atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) pfkey_do_dump(pfk); out_free: skb_free_datagram(sk, skb); out: return err; } static const struct proto_ops pfkey_ops = { .family = PF_KEY, .owner = THIS_MODULE, /* Operations that make no sense on pfkey sockets. */ .bind = sock_no_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, /* Now the operations that really occur. */ .release = pfkey_release, .poll = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg, }; static const struct net_proto_family pfkey_family_ops = { .family = PF_KEY, .create = pfkey_create, .owner = THIS_MODULE, }; #ifdef CONFIG_PROC_FS static int pfkey_seq_show(struct seq_file *f, void *v) { struct sock *s = sk_entry(v); if (v == SEQ_START_TOKEN) seq_printf(f ,"sk RefCnt Rmem Wmem User Inode\n"); else seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n", s, refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), sock_i_ino(s) ); return 0; } static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) __acquires(rcu) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); rcu_read_lock(); return seq_hlist_start_head_rcu(&net_pfkey->table, *ppos); } static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); return seq_hlist_next_rcu(v, &net_pfkey->table, ppos); } static void pfkey_seq_stop(struct seq_file *f, void *v) __releases(rcu) { rcu_read_unlock(); } static const struct seq_operations pfkey_seq_ops = { .start = pfkey_seq_start, .next = pfkey_seq_next, .stop = pfkey_seq_stop, .show = pfkey_seq_show, }; static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; e = proc_create_net("pfkey", 0, net->proc_net, &pfkey_seq_ops, sizeof(struct seq_net_private)); if (e == NULL) return -ENOMEM; return 0; } static void __net_exit pfkey_exit_proc(struct net *net) { remove_proc_entry("pfkey", net->proc_net); } #else static inline int pfkey_init_proc(struct net *net) { return 0; } static inline void pfkey_exit_proc(struct net *net) { } #endif static struct xfrm_mgr pfkeyv2_mgr = { .notify = pfkey_send_notify, .acquire = pfkey_send_acquire, .compile_policy = pfkey_compile_policy, .new_mapping = pfkey_send_new_mapping, .notify_policy = pfkey_send_policy_notify, .migrate = pfkey_send_migrate, .is_alive = pfkey_is_alive, }; static int __net_init pfkey_net_init(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); int rv; INIT_HLIST_HEAD(&net_pfkey->table); atomic_set(&net_pfkey->socks_nr, 0); rv = pfkey_init_proc(net); return rv; } static void __net_exit pfkey_net_exit(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_exit_proc(net); WARN_ON(!hlist_empty(&net_pfkey->table)); } static struct pernet_operations pfkey_net_ops = { .init = pfkey_net_init, .exit = pfkey_net_exit, .id = &pfkey_net_id, .size = sizeof(struct netns_pfkey), }; static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); unregister_pernet_subsys(&pfkey_net_ops); proto_unregister(&key_proto); } static int __init ipsec_pfkey_init(void) { int err = proto_register(&key_proto, 0); if (err != 0) goto out; err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_unregister_key_proto; err = sock_register(&pfkey_family_ops); if (err != 0) goto out_unregister_pernet; xfrm_register_km(&pfkeyv2_mgr); out: return err; out_unregister_pernet: unregister_pernet_subsys(&pfkey_net_ops); out_unregister_key_proto: proto_unregister(&key_proto); goto out; } module_init(ipsec_pfkey_init); module_exit(ipsec_pfkey_exit); MODULE_DESCRIPTION("PF_KEY socket helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_KEY);
3 1 3 3 3 10 4 1 1 1 3 3 3 3 1 1 2 3 3 11 1 1 1 1 3 3 7 7 7 1 1 1 3 2 4 2 1 1 1 1 4 4 4 4 4 1 4 4 1 1 4 4 4 4 1 2 1 1 1 1 1 1 1 2 4 4 7 7 7 7 7 4 4 4 4 4 2 4 4 4 1 1 1 6 6 2 3 3 3 3 30 8 1 1 1 2 1 1 1 1 1 10 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 // SPDX-License-Identifier: GPL-2.0 /* * n_gsm.c GSM 0710 tty multiplexor * Copyright (c) 2009/10 Intel Corporation * Copyright (c) 2022/23 Siemens Mobility GmbH * * * THIS IS A DEVELOPMENT SNAPSHOT IT IS NOT A FINAL RELEASE * * * Outgoing path: * tty -> DLCI fifo -> scheduler -> GSM MUX data queue ---o-> ldisc * control message -> GSM MUX control queue --´ * * Incoming path: * ldisc -> gsm_queue() -o--> tty * `-> gsm_control_response() * * TO DO: * Mostly done: ioctls for setting modes/timing * Partly done: hooks so you can pull off frames to non tty devs * Restart DLCI 0 when it closes ? * Improve the tx engine * Resolve tx side locking by adding a queue_head and routing * all control traffic via it * General tidy/document * Review the locking/move to refcounts more (mux now moved to an * alloc/free model ready) * Use newest tty open/close port helpers and install hooks * What to do about power functions ? * Termios setting and negotiation * Do we need a 'which mux are you' ioctl to correlate mux and tty sets * */ #include <linux/types.h> #include <linux/major.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/fcntl.h> #include <linux/sched/signal.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/bitfield.h> #include <linux/ctype.h> #include <linux/mm.h> #include <linux/math.h> #include <linux/nospec.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/bitops.h> #include <linux/file.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/timer.h> #include <linux/tty_flip.h> #include <linux/tty_driver.h> #include <linux/serial.h> #include <linux/kfifo.h> #include <linux/skbuff.h> #include <net/arp.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/gsmmux.h> #include "tty.h" static int debug; module_param(debug, int, 0600); /* Module debug bits */ #define DBG_DUMP BIT(0) /* Data transmission dump. */ #define DBG_CD_ON BIT(1) /* Always assume CD line on. */ #define DBG_DATA BIT(2) /* Data transmission details. */ #define DBG_ERRORS BIT(3) /* Details for fail conditions. */ #define DBG_TTY BIT(4) /* Transmission statistics for DLCI TTYs. */ #define DBG_PAYLOAD BIT(5) /* Limits DBG_DUMP to payload frames. */ /* Defaults: these are from the specification */ #define T1 10 /* 100mS */ #define T2 34 /* 333mS */ #define T3 10 /* 10s */ #define N2 3 /* Retry 3 times */ #define K 2 /* outstanding I frames */ #define MAX_T3 255 /* In seconds. */ #define MAX_WINDOW_SIZE 7 /* Limit of K in error recovery mode. */ /* Use long timers for testing at low speed with debug on */ #ifdef DEBUG_TIMING #define T1 100 #define T2 200 #endif /* * Semi-arbitrary buffer size limits. 0710 is normally run with 32-64 byte * limits so this is plenty */ #define MAX_MRU 1500 #define MAX_MTU 1500 #define MIN_MTU (PROT_OVERHEAD + 1) /* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */ #define PROT_OVERHEAD 7 #define GSM_NET_TX_TIMEOUT (HZ*10) /* * struct gsm_mux_net - network interface * * Created when net interface is initialized. */ struct gsm_mux_net { struct kref ref; struct gsm_dlci *dlci; }; /* * Each block of data we have queued to go out is in the form of * a gsm_msg which holds everything we need in a link layer independent * format */ struct gsm_msg { struct list_head list; u8 addr; /* DLCI address + flags */ u8 ctrl; /* Control byte + flags */ unsigned int len; /* Length of data block (can be zero) */ u8 *data; /* Points into buffer but not at the start */ u8 buffer[]; }; enum gsm_dlci_state { DLCI_CLOSED, DLCI_WAITING_CONFIG, /* Waiting for DLCI configuration from user */ DLCI_CONFIGURE, /* Sending PN (for adaption > 1) */ DLCI_OPENING, /* Sending SABM not seen UA */ DLCI_OPEN, /* SABM/UA complete */ DLCI_CLOSING, /* Sending DISC not seen UA/DM */ }; enum gsm_dlci_mode { DLCI_MODE_ABM, /* Normal Asynchronous Balanced Mode */ DLCI_MODE_ADM, /* Asynchronous Disconnected Mode */ }; /* * Each active data link has a gsm_dlci structure associated which ties * the link layer to an optional tty (if the tty side is open). To avoid * complexity right now these are only ever freed up when the mux is * shut down. * * At the moment we don't free DLCI objects until the mux is torn down * this avoid object life time issues but might be worth review later. */ struct gsm_dlci { struct gsm_mux *gsm; int addr; enum gsm_dlci_state state; struct mutex mutex; /* Link layer */ enum gsm_dlci_mode mode; spinlock_t lock; /* Protects the internal state */ struct timer_list t1; /* Retransmit timer for SABM and UA */ int retries; /* Uplink tty if active */ struct tty_port port; /* The tty bound to this DLCI if there is one */ #define TX_SIZE 4096 /* Must be power of 2. */ struct kfifo fifo; /* Queue fifo for the DLCI */ int adaption; /* Adaption layer in use */ int prev_adaption; u32 modem_rx; /* Our incoming virtual modem lines */ u32 modem_tx; /* Our outgoing modem lines */ unsigned int mtu; bool dead; /* Refuse re-open */ /* Configuration */ u8 prio; /* Priority */ u8 ftype; /* Frame type */ u8 k; /* Window size */ /* Flow control */ bool throttled; /* Private copy of throttle state */ bool constipated; /* Throttle status for outgoing */ /* Packetised I/O */ struct sk_buff *skb; /* Frame being sent */ struct sk_buff_head skb_list; /* Queued frames */ /* Data handling callback */ void (*data)(struct gsm_dlci *dlci, const u8 *data, int len); void (*prev_data)(struct gsm_dlci *dlci, const u8 *data, int len); struct net_device *net; /* network interface, if created */ }; /* * Parameter bits used for parameter negotiation according to 3GPP 27.010 * chapter 5.4.6.3.1. */ struct gsm_dlci_param_bits { u8 d_bits; u8 i_cl_bits; u8 p_bits; u8 t_bits; __le16 n_bits; u8 na_bits; u8 k_bits; }; static_assert(sizeof(struct gsm_dlci_param_bits) == 8); #define PN_D_FIELD_DLCI GENMASK(5, 0) #define PN_I_CL_FIELD_FTYPE GENMASK(3, 0) #define PN_I_CL_FIELD_ADAPTION GENMASK(7, 4) #define PN_P_FIELD_PRIO GENMASK(5, 0) #define PN_T_FIELD_T1 GENMASK(7, 0) #define PN_N_FIELD_N1 GENMASK(15, 0) #define PN_NA_FIELD_N2 GENMASK(7, 0) #define PN_K_FIELD_K GENMASK(2, 0) /* Total number of supported devices */ #define GSM_TTY_MINORS 256 /* DLCI 0, 62/63 are special or reserved see gsmtty_open */ #define NUM_DLCI 64 /* * DLCI 0 is used to pass control blocks out of band of the data * flow (and with a higher link priority). One command can be outstanding * at a time and we use this structure to manage them. They are created * and destroyed by the user context, and updated by the receive paths * and timers */ struct gsm_control { u8 cmd; /* Command we are issuing */ u8 *data; /* Data for the command in case we retransmit */ int len; /* Length of block for retransmission */ int done; /* Done flag */ int error; /* Error if any */ }; enum gsm_encoding { GSM_BASIC_OPT, GSM_ADV_OPT, }; enum gsm_mux_state { GSM_SEARCH, GSM0_ADDRESS, GSM0_CONTROL, GSM0_LEN0, GSM0_LEN1, GSM0_DATA, GSM0_FCS, GSM0_SSOF, GSM1_START, GSM1_ADDRESS, GSM1_CONTROL, GSM1_DATA, GSM1_OVERRUN, }; /* * Each GSM mux we have is represented by this structure. If we are * operating as an ldisc then we use this structure as our ldisc * state. We need to sort out lifetimes and locking with respect * to the gsm mux array. For now we don't free DLCI objects that * have been instantiated until the mux itself is terminated. * * To consider further: tty open versus mux shutdown. */ struct gsm_mux { struct tty_struct *tty; /* The tty our ldisc is bound to */ spinlock_t lock; struct mutex mutex; unsigned int num; struct kref ref; /* Events on the GSM channel */ wait_queue_head_t event; /* ldisc send work */ struct work_struct tx_work; /* Bits for GSM mode decoding */ /* Framing Layer */ u8 *buf; enum gsm_mux_state state; unsigned int len; unsigned int address; unsigned int count; bool escape; enum gsm_encoding encoding; u8 control; u8 fcs; u8 *txframe; /* TX framing buffer */ /* Method for the receiver side */ void (*receive)(struct gsm_mux *gsm, u8 ch); /* Link Layer */ unsigned int mru; unsigned int mtu; int initiator; /* Did we initiate connection */ bool dead; /* Has the mux been shut down */ struct gsm_dlci *dlci[NUM_DLCI]; int old_c_iflag; /* termios c_iflag value before attach */ bool constipated; /* Asked by remote to shut up */ bool has_devices; /* Devices were registered */ spinlock_t tx_lock; unsigned int tx_bytes; /* TX data outstanding */ #define TX_THRESH_HI 8192 #define TX_THRESH_LO 2048 struct list_head tx_ctrl_list; /* Pending control packets */ struct list_head tx_data_list; /* Pending data packets */ /* Control messages */ struct timer_list kick_timer; /* Kick TX queuing on timeout */ struct timer_list t2_timer; /* Retransmit timer for commands */ int cretries; /* Command retry counter */ struct gsm_control *pending_cmd;/* Our current pending command */ spinlock_t control_lock; /* Protects the pending command */ /* Keep-alive */ struct timer_list ka_timer; /* Keep-alive response timer */ u8 ka_num; /* Keep-alive match pattern */ signed int ka_retries; /* Keep-alive retry counter, -1 if not yet initialized */ /* Configuration */ int adaption; /* 1 or 2 supported */ u8 ftype; /* UI or UIH */ int t1, t2; /* Timers in 1/100th of a sec */ unsigned int t3; /* Power wake-up timer in seconds. */ int n2; /* Retry count */ u8 k; /* Window size */ bool wait_config; /* Wait for configuration by ioctl before DLCI open */ u32 keep_alive; /* Control channel keep-alive in 10ms */ /* Statistics (not currently exposed) */ unsigned long bad_fcs; unsigned long malformed; unsigned long io_error; unsigned long open_error; unsigned long bad_size; unsigned long unsupported; }; /* * Mux objects - needed so that we can translate a tty index into the * relevant mux and DLCI. */ #define MAX_MUX 4 /* 256 minors */ static struct gsm_mux *gsm_mux[MAX_MUX]; /* GSM muxes */ static DEFINE_SPINLOCK(gsm_mux_lock); static struct tty_driver *gsm_tty_driver; /* * This section of the driver logic implements the GSM encodings * both the basic and the 'advanced'. Reliable transport is not * supported. */ #define CR 0x02 #define EA 0x01 #define PF 0x10 /* I is special: the rest are ..*/ #define RR 0x01 #define UI 0x03 #define RNR 0x05 #define REJ 0x09 #define DM 0x0F #define SABM 0x2F #define DISC 0x43 #define UA 0x63 #define UIH 0xEF /* Channel commands */ #define CMD_NSC 0x09 #define CMD_TEST 0x11 #define CMD_PSC 0x21 #define CMD_RLS 0x29 #define CMD_FCOFF 0x31 #define CMD_PN 0x41 #define CMD_RPN 0x49 #define CMD_FCON 0x51 #define CMD_CLD 0x61 #define CMD_SNC 0x69 #define CMD_MSC 0x71 /* Virtual modem bits */ #define MDM_FC 0x01 #define MDM_RTC 0x02 #define MDM_RTR 0x04 #define MDM_IC 0x20 #define MDM_DV 0x40 #define GSM0_SOF 0xF9 #define GSM1_SOF 0x7E #define GSM1_ESCAPE 0x7D #define GSM1_ESCAPE_BITS 0x20 #define XON 0x11 #define XOFF 0x13 #define ISO_IEC_646_MASK 0x7F static const struct tty_port_operations gsm_port_ops; /* * CRC table for GSM 0710 */ static const u8 gsm_fcs8[256] = { 0x00, 0x91, 0xE3, 0x72, 0x07, 0x96, 0xE4, 0x75, 0x0E, 0x9F, 0xED, 0x7C, 0x09, 0x98, 0xEA, 0x7B, 0x1C, 0x8D, 0xFF, 0x6E, 0x1B, 0x8A, 0xF8, 0x69, 0x12, 0x83, 0xF1, 0x60, 0x15, 0x84, 0xF6, 0x67, 0x38, 0xA9, 0xDB, 0x4A, 0x3F, 0xAE, 0xDC, 0x4D, 0x36, 0xA7, 0xD5, 0x44, 0x31, 0xA0, 0xD2, 0x43, 0x24, 0xB5, 0xC7, 0x56, 0x23, 0xB2, 0xC0, 0x51, 0x2A, 0xBB, 0xC9, 0x58, 0x2D, 0xBC, 0xCE, 0x5F, 0x70, 0xE1, 0x93, 0x02, 0x77, 0xE6, 0x94, 0x05, 0x7E, 0xEF, 0x9D, 0x0C, 0x79, 0xE8, 0x9A, 0x0B, 0x6C, 0xFD, 0x8F, 0x1E, 0x6B, 0xFA, 0x88, 0x19, 0x62, 0xF3, 0x81, 0x10, 0x65, 0xF4, 0x86, 0x17, 0x48, 0xD9, 0xAB, 0x3A, 0x4F, 0xDE, 0xAC, 0x3D, 0x46, 0xD7, 0xA5, 0x34, 0x41, 0xD0, 0xA2, 0x33, 0x54, 0xC5, 0xB7, 0x26, 0x53, 0xC2, 0xB0, 0x21, 0x5A, 0xCB, 0xB9, 0x28, 0x5D, 0xCC, 0xBE, 0x2F, 0xE0, 0x71, 0x03, 0x92, 0xE7, 0x76, 0x04, 0x95, 0xEE, 0x7F, 0x0D, 0x9C, 0xE9, 0x78, 0x0A, 0x9B, 0xFC, 0x6D, 0x1F, 0x8E, 0xFB, 0x6A, 0x18, 0x89, 0xF2, 0x63, 0x11, 0x80, 0xF5, 0x64, 0x16, 0x87, 0xD8, 0x49, 0x3B, 0xAA, 0xDF, 0x4E, 0x3C, 0xAD, 0xD6, 0x47, 0x35, 0xA4, 0xD1, 0x40, 0x32, 0xA3, 0xC4, 0x55, 0x27, 0xB6, 0xC3, 0x52, 0x20, 0xB1, 0xCA, 0x5B, 0x29, 0xB8, 0xCD, 0x5C, 0x2E, 0xBF, 0x90, 0x01, 0x73, 0xE2, 0x97, 0x06, 0x74, 0xE5, 0x9E, 0x0F, 0x7D, 0xEC, 0x99, 0x08, 0x7A, 0xEB, 0x8C, 0x1D, 0x6F, 0xFE, 0x8B, 0x1A, 0x68, 0xF9, 0x82, 0x13, 0x61, 0xF0, 0x85, 0x14, 0x66, 0xF7, 0xA8, 0x39, 0x4B, 0xDA, 0xAF, 0x3E, 0x4C, 0xDD, 0xA6, 0x37, 0x45, 0xD4, 0xA1, 0x30, 0x42, 0xD3, 0xB4, 0x25, 0x57, 0xC6, 0xB3, 0x22, 0x50, 0xC1, 0xBA, 0x2B, 0x59, 0xC8, 0xBD, 0x2C, 0x5E, 0xCF }; #define INIT_FCS 0xFF #define GOOD_FCS 0xCF static void gsm_dlci_close(struct gsm_dlci *dlci); static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk); static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len, u8 ctrl); static int gsm_send_packet(struct gsm_mux *gsm, struct gsm_msg *msg); static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr); static void gsmld_write_trigger(struct gsm_mux *gsm); static void gsmld_write_task(struct work_struct *work); /** * gsm_fcs_add - update FCS * @fcs: Current FCS * @c: Next data * * Update the FCS to include c. Uses the algorithm in the specification * notes. */ static inline u8 gsm_fcs_add(u8 fcs, u8 c) { return gsm_fcs8[fcs ^ c]; } /** * gsm_fcs_add_block - update FCS for a block * @fcs: Current FCS * @c: buffer of data * @len: length of buffer * * Update the FCS to include c. Uses the algorithm in the specification * notes. */ static inline u8 gsm_fcs_add_block(u8 fcs, u8 *c, int len) { while (len--) fcs = gsm_fcs8[fcs ^ *c++]; return fcs; } /** * gsm_read_ea - read a byte into an EA * @val: variable holding value * @c: byte going into the EA * * Processes one byte of an EA. Updates the passed variable * and returns 1 if the EA is now completely read */ static int gsm_read_ea(unsigned int *val, u8 c) { /* Add the next 7 bits into the value */ *val <<= 7; *val |= c >> 1; /* Was this the last byte of the EA 1 = yes*/ return c & EA; } /** * gsm_read_ea_val - read a value until EA * @val: variable holding value * @data: buffer of data * @dlen: length of data * * Processes an EA value. Updates the passed variable and * returns the processed data length. */ static unsigned int gsm_read_ea_val(unsigned int *val, const u8 *data, int dlen) { unsigned int len = 0; for (; dlen > 0; dlen--) { len++; if (gsm_read_ea(val, *data++)) break; } return len; } /** * gsm_encode_modem - encode modem data bits * @dlci: DLCI to encode from * * Returns the correct GSM encoded modem status bits (6 bit field) for * the current status of the DLCI and attached tty object */ static u8 gsm_encode_modem(const struct gsm_dlci *dlci) { u8 modembits = 0; /* FC is true flow control not modem bits */ if (dlci->throttled) modembits |= MDM_FC; if (dlci->modem_tx & TIOCM_DTR) modembits |= MDM_RTC; if (dlci->modem_tx & TIOCM_RTS) modembits |= MDM_RTR; if (dlci->modem_tx & TIOCM_RI) modembits |= MDM_IC; if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator) modembits |= MDM_DV; /* special mappings for passive side to operate as UE */ if (dlci->modem_tx & TIOCM_OUT1) modembits |= MDM_IC; if (dlci->modem_tx & TIOCM_OUT2) modembits |= MDM_DV; return modembits; } static void gsm_hex_dump_bytes(const char *fname, const u8 *data, unsigned long len) { char *prefix; if (!fname) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, data, len, true); return; } prefix = kasprintf(GFP_ATOMIC, "%s: ", fname); if (!prefix) return; print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, 16, 1, data, len, true); kfree(prefix); } /** * gsm_encode_params - encode DLCI parameters * @dlci: DLCI to encode from * @params: buffer to fill with the encoded parameters * * Encodes the parameters according to GSM 07.10 section 5.4.6.3.1 * table 3. */ static int gsm_encode_params(const struct gsm_dlci *dlci, struct gsm_dlci_param_bits *params) { const struct gsm_mux *gsm = dlci->gsm; unsigned int i, cl; switch (dlci->ftype) { case UIH: i = 0; /* UIH */ break; case UI: i = 1; /* UI */ break; default: pr_debug("unsupported frame type %d\n", dlci->ftype); return -EINVAL; } switch (dlci->adaption) { case 1: /* Unstructured */ cl = 0; /* convergence layer type 1 */ break; case 2: /* Unstructured with modem bits. */ cl = 1; /* convergence layer type 2 */ break; default: pr_debug("unsupported adaption %d\n", dlci->adaption); return -EINVAL; } params->d_bits = FIELD_PREP(PN_D_FIELD_DLCI, dlci->addr); /* UIH, convergence layer type 1 */ params->i_cl_bits = FIELD_PREP(PN_I_CL_FIELD_FTYPE, i) | FIELD_PREP(PN_I_CL_FIELD_ADAPTION, cl); params->p_bits = FIELD_PREP(PN_P_FIELD_PRIO, dlci->prio); params->t_bits = FIELD_PREP(PN_T_FIELD_T1, gsm->t1); params->n_bits = cpu_to_le16(FIELD_PREP(PN_N_FIELD_N1, dlci->mtu)); params->na_bits = FIELD_PREP(PN_NA_FIELD_N2, gsm->n2); params->k_bits = FIELD_PREP(PN_K_FIELD_K, dlci->k); return 0; } /** * gsm_register_devices - register all tty devices for a given mux index * * @driver: the tty driver that describes the tty devices * @index: the mux number is used to calculate the minor numbers of the * ttys for this mux and may differ from the position in the * mux array. */ static int gsm_register_devices(struct tty_driver *driver, unsigned int index) { struct device *dev; int i; unsigned int base; if (!driver || index >= MAX_MUX) return -EINVAL; base = index * NUM_DLCI; /* first minor for this index */ for (i = 1; i < NUM_DLCI; i++) { /* Don't register device 0 - this is the control channel * and not a usable tty interface */ dev = tty_register_device(gsm_tty_driver, base + i, NULL); if (IS_ERR(dev)) { if (debug & DBG_ERRORS) pr_info("%s failed to register device minor %u", __func__, base + i); for (i--; i >= 1; i--) tty_unregister_device(gsm_tty_driver, base + i); return PTR_ERR(dev); } } return 0; } /** * gsm_unregister_devices - unregister all tty devices for a given mux index * * @driver: the tty driver that describes the tty devices * @index: the mux number is used to calculate the minor numbers of the * ttys for this mux and may differ from the position in the * mux array. */ static void gsm_unregister_devices(struct tty_driver *driver, unsigned int index) { int i; unsigned int base; if (!driver || index >= MAX_MUX) return; base = index * NUM_DLCI; /* first minor for this index */ for (i = 1; i < NUM_DLCI; i++) { /* Don't unregister device 0 - this is the control * channel and not a usable tty interface */ tty_unregister_device(gsm_tty_driver, base + i); } } /** * gsm_print_packet - display a frame for debug * @hdr: header to print before decode * @addr: address EA from the frame * @cr: C/R bit seen as initiator * @control: control including PF bit * @data: following data bytes * @dlen: length of data * * Displays a packet in human readable format for debugging purposes. The * style is based on amateur radio LAP-B dump display. */ static void gsm_print_packet(const char *hdr, int addr, int cr, u8 control, const u8 *data, int dlen) { if (!(debug & DBG_DUMP)) return; /* Only show user payload frames if debug & DBG_PAYLOAD */ if (!(debug & DBG_PAYLOAD) && addr != 0) if ((control & ~PF) == UI || (control & ~PF) == UIH) return; pr_info("%s %d) %c: ", hdr, addr, "RC"[cr]); switch (control & ~PF) { case SABM: pr_cont("SABM"); break; case UA: pr_cont("UA"); break; case DISC: pr_cont("DISC"); break; case DM: pr_cont("DM"); break; case UI: pr_cont("UI"); break; case UIH: pr_cont("UIH"); break; default: if (!(control & 0x01)) { pr_cont("I N(S)%d N(R)%d", (control & 0x0E) >> 1, (control & 0xE0) >> 5); } else switch (control & 0x0F) { case RR: pr_cont("RR(%d)", (control & 0xE0) >> 5); break; case RNR: pr_cont("RNR(%d)", (control & 0xE0) >> 5); break; case REJ: pr_cont("REJ(%d)", (control & 0xE0) >> 5); break; default: pr_cont("[%02X]", control); } } if (control & PF) pr_cont("(P)"); else pr_cont("(F)"); gsm_hex_dump_bytes(NULL, data, dlen); } /* * Link level transmission side */ /** * gsm_stuff_frame - bytestuff a packet * @input: input buffer * @output: output buffer * @len: length of input * * Expand a buffer by bytestuffing it. The worst case size change * is doubling and the caller is responsible for handing out * suitable sized buffers. */ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) { int olen = 0; while (len--) { if (*input == GSM1_SOF || *input == GSM1_ESCAPE || (*input & ISO_IEC_646_MASK) == XON || (*input & ISO_IEC_646_MASK) == XOFF) { *output++ = GSM1_ESCAPE; *output++ = *input++ ^ GSM1_ESCAPE_BITS; olen++; } else *output++ = *input++; olen++; } return olen; } /** * gsm_send - send a control frame * @gsm: our GSM mux * @addr: address for control frame * @cr: command/response bit seen as initiator * @control: control byte including PF bit * * Format up and transmit a control frame. These should be transmitted * ahead of data when they are needed. */ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) { struct gsm_msg *msg; u8 *dp; int ocr; unsigned long flags; msg = gsm_data_alloc(gsm, addr, 0, control); if (!msg) return -ENOMEM; /* toggle C/R coding if not initiator */ ocr = cr ^ (gsm->initiator ? 0 : 1); msg->data -= 3; dp = msg->data; *dp++ = (addr << 2) | (ocr << 1) | EA; *dp++ = control; if (gsm->encoding == GSM_BASIC_OPT) *dp++ = EA; /* Length of data = 0 */ *dp = 0xFF - gsm_fcs_add_block(INIT_FCS, msg->data, dp - msg->data); msg->len = (dp - msg->data) + 1; gsm_print_packet("Q->", addr, cr, control, NULL, 0); spin_lock_irqsave(&gsm->tx_lock, flags); list_add_tail(&msg->list, &gsm->tx_ctrl_list); gsm->tx_bytes += msg->len; spin_unlock_irqrestore(&gsm->tx_lock, flags); gsmld_write_trigger(gsm); return 0; } /** * gsm_dlci_clear_queues - remove outstanding data for a DLCI * @gsm: mux * @dlci: clear for this DLCI * * Clears the data queues for a given DLCI. */ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) { struct gsm_msg *msg, *nmsg; int addr = dlci->addr; unsigned long flags; /* Clear DLCI write fifo first */ spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); spin_unlock_irqrestore(&dlci->lock, flags); /* Clear data packets in MUX write queue */ spin_lock_irqsave(&gsm->tx_lock, flags); list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { if (msg->addr != addr) continue; gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); } spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** * gsm_response - send a control response * @gsm: our GSM mux * @addr: address for control frame * @control: control byte including PF bit * * Format up and transmit a link level response frame. */ static inline void gsm_response(struct gsm_mux *gsm, int addr, int control) { gsm_send(gsm, addr, 0, control); } /** * gsm_command - send a control command * @gsm: our GSM mux * @addr: address for control frame * @control: control byte including PF bit * * Format up and transmit a link level command frame. */ static inline void gsm_command(struct gsm_mux *gsm, int addr, int control) { gsm_send(gsm, addr, 1, control); } /* Data transmission */ #define HDR_LEN 6 /* ADDR CTRL [LEN.2] DATA FCS */ /** * gsm_data_alloc - allocate data frame * @gsm: GSM mux * @addr: DLCI address * @len: length excluding header and FCS * @ctrl: control byte * * Allocate a new data buffer for sending frames with data. Space is left * at the front for header bytes but that is treated as an implementation * detail and not for the high level code to use */ static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len, u8 ctrl) { struct gsm_msg *m = kmalloc(sizeof(struct gsm_msg) + len + HDR_LEN, GFP_ATOMIC); if (m == NULL) return NULL; m->data = m->buffer + HDR_LEN - 1; /* Allow for FCS */ m->len = len; m->addr = addr; m->ctrl = ctrl; INIT_LIST_HEAD(&m->list); return m; } /** * gsm_send_packet - sends a single packet * @gsm: GSM Mux * @msg: packet to send * * The given packet is encoded and sent out. No memory is freed. * The caller must hold the gsm tx lock. */ static int gsm_send_packet(struct gsm_mux *gsm, struct gsm_msg *msg) { int len, ret; if (gsm->encoding == GSM_BASIC_OPT) { gsm->txframe[0] = GSM0_SOF; memcpy(gsm->txframe + 1, msg->data, msg->len); gsm->txframe[msg->len + 1] = GSM0_SOF; len = msg->len + 2; } else { gsm->txframe[0] = GSM1_SOF; len = gsm_stuff_frame(msg->data, gsm->txframe + 1, msg->len); gsm->txframe[len + 1] = GSM1_SOF; len += 2; } if (debug & DBG_DATA) gsm_hex_dump_bytes(__func__, gsm->txframe, len); gsm_print_packet("-->", msg->addr, gsm->initiator, msg->ctrl, msg->data, msg->len); ret = gsmld_output(gsm, gsm->txframe, len); if (ret <= 0) return ret; /* FIXME: Can eliminate one SOF in many more cases */ gsm->tx_bytes -= msg->len; return 0; } /** * gsm_is_flow_ctrl_msg - checks if flow control message * @msg: message to check * * Returns true if the given message is a flow control command of the * control channel. False is returned in any other case. */ static bool gsm_is_flow_ctrl_msg(struct gsm_msg *msg) { unsigned int cmd; if (msg->addr > 0) return false; switch (msg->ctrl & ~PF) { case UI: case UIH: cmd = 0; if (gsm_read_ea_val(&cmd, msg->data + 2, msg->len - 2) < 1) break; switch (cmd & ~PF) { case CMD_FCOFF: case CMD_FCON: return true; } break; } return false; } /** * gsm_data_kick - poke the queue * @gsm: GSM Mux * * The tty device has called us to indicate that room has appeared in * the transmit queue. Ram more data into the pipe if we have any. * If we have been flow-stopped by a CMD_FCOFF, then we can only * send messages on DLCI0 until CMD_FCON. The caller must hold * the gsm tx lock. */ static int gsm_data_kick(struct gsm_mux *gsm) { struct gsm_msg *msg, *nmsg; struct gsm_dlci *dlci; int ret; clear_bit(TTY_DO_WRITE_WAKEUP, &gsm->tty->flags); /* Serialize control messages and control channel messages first */ list_for_each_entry_safe(msg, nmsg, &gsm->tx_ctrl_list, list) { if (gsm->constipated && !gsm_is_flow_ctrl_msg(msg)) continue; ret = gsm_send_packet(gsm, msg); switch (ret) { case -ENOSPC: return -ENOSPC; case -ENODEV: /* ldisc not open */ gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); continue; default: if (ret >= 0) { list_del(&msg->list); kfree(msg); } break; } } if (gsm->constipated) return -EAGAIN; /* Serialize other channels */ if (list_empty(&gsm->tx_data_list)) return 0; list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { dlci = gsm->dlci[msg->addr]; /* Send only messages for DLCIs with valid state */ if (dlci->state != DLCI_OPEN) { gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); continue; } ret = gsm_send_packet(gsm, msg); switch (ret) { case -ENOSPC: return -ENOSPC; case -ENODEV: /* ldisc not open */ gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); continue; default: if (ret >= 0) { list_del(&msg->list); kfree(msg); } break; } } return 1; } /** * __gsm_data_queue - queue a UI or UIH frame * @dlci: DLCI sending the data * @msg: message queued * * Add data to the transmit queue and try and get stuff moving * out of the mux tty if not already doing so. The Caller must hold * the gsm tx lock. */ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { struct gsm_mux *gsm = dlci->gsm; u8 *dp = msg->data; u8 *fcs = dp + msg->len; /* Fill in the header */ if (gsm->encoding == GSM_BASIC_OPT) { if (msg->len < 128) *--dp = (msg->len << 1) | EA; else { *--dp = (msg->len >> 7); /* bits 7 - 15 */ *--dp = (msg->len & 127) << 1; /* bits 0 - 6 */ } } *--dp = msg->ctrl; if (gsm->initiator) *--dp = (msg->addr << 2) | CR | EA; else *--dp = (msg->addr << 2) | EA; *fcs = gsm_fcs_add_block(INIT_FCS, dp , msg->data - dp); /* Ugly protocol layering violation */ if (msg->ctrl == UI || msg->ctrl == (UI|PF)) *fcs = gsm_fcs_add_block(*fcs, msg->data, msg->len); *fcs = 0xFF - *fcs; gsm_print_packet("Q> ", msg->addr, gsm->initiator, msg->ctrl, msg->data, msg->len); /* Move the header back and adjust the length, also allow for the FCS now tacked on the end */ msg->len += (msg->data - dp) + 1; msg->data = dp; /* Add to the actual output queue */ switch (msg->ctrl & ~PF) { case UI: case UIH: if (msg->addr > 0) { list_add_tail(&msg->list, &gsm->tx_data_list); break; } fallthrough; default: list_add_tail(&msg->list, &gsm->tx_ctrl_list); break; } gsm->tx_bytes += msg->len; gsmld_write_trigger(gsm); mod_timer(&gsm->kick_timer, jiffies + 10 * gsm->t1 * HZ / 100); } /** * gsm_data_queue - queue a UI or UIH frame * @dlci: DLCI sending the data * @msg: message queued * * Add data to the transmit queue and try and get stuff moving * out of the mux tty if not already doing so. Take the * the gsm tx lock and dlci lock. */ static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { unsigned long flags; spin_lock_irqsave(&dlci->gsm->tx_lock, flags); __gsm_data_queue(dlci, msg); spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /** * gsm_dlci_data_output - try and push data out of a DLCI * @gsm: mux * @dlci: the DLCI to pull data from * * Pull data from a DLCI and send it into the transmit queue if there * is data. Keep to the MRU of the mux. This path handles the usual tty * interface which is a byte stream with optional modem data. * * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) { struct gsm_msg *msg; u8 *dp; int h, len, size; /* for modem bits without break data */ h = ((dlci->adaption == 1) ? 0 : 1); len = kfifo_len(&dlci->fifo); if (len == 0) return 0; /* MTU/MRU count only the data bits but watch adaption mode */ if ((len + h) > dlci->mtu) len = dlci->mtu - h; size = len + h; msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (!msg) return -ENOMEM; dp = msg->data; switch (dlci->adaption) { case 1: /* Unstructured */ break; case 2: /* Unstructured with modem bits. * Always one byte as we never send inline break data */ *dp++ = (gsm_encode_modem(dlci) << 1) | EA; break; default: pr_err("%s: unsupported adaption %d\n", __func__, dlci->adaption); break; } WARN_ON(len != kfifo_out_locked(&dlci->fifo, dp, len, &dlci->lock)); /* Notify upper layer about available send space. */ tty_port_tty_wakeup(&dlci->port); __gsm_data_queue(dlci, msg); /* Bytes of data we used up */ return size; } /** * gsm_dlci_data_output_framed - try and push data out of a DLCI * @gsm: mux * @dlci: the DLCI to pull data from * * Pull data from a DLCI and send it into the transmit queue if there * is data. Keep to the MRU of the mux. This path handles framed data * queued as skbuffs to the DLCI. * * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, struct gsm_dlci *dlci) { struct gsm_msg *msg; u8 *dp; int len, size; int last = 0, first = 0; int overhead = 0; /* One byte per frame is used for B/F flags */ if (dlci->adaption == 4) overhead = 1; /* dlci->skb is locked by tx_lock */ if (dlci->skb == NULL) { dlci->skb = skb_dequeue_tail(&dlci->skb_list); if (dlci->skb == NULL) return 0; first = 1; } len = dlci->skb->len + overhead; /* MTU/MRU count only the data bits */ if (len > dlci->mtu) { if (dlci->adaption == 3) { /* Over long frame, bin it */ dev_kfree_skb_any(dlci->skb); dlci->skb = NULL; return 0; } len = dlci->mtu; } else last = 1; size = len + overhead; msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (msg == NULL) { skb_queue_tail(&dlci->skb_list, dlci->skb); dlci->skb = NULL; return -ENOMEM; } dp = msg->data; if (dlci->adaption == 4) { /* Interruptible framed (Packetised Data) */ /* Flag byte to carry the start/end info */ *dp++ = last << 7 | first << 6 | 1; /* EA */ len--; } memcpy(dp, dlci->skb->data, len); skb_pull(dlci->skb, len); __gsm_data_queue(dlci, msg); if (last) { dev_kfree_skb_any(dlci->skb); dlci->skb = NULL; } return size; } /** * gsm_dlci_modem_output - try and push modem status out of a DLCI * @gsm: mux * @dlci: the DLCI to pull modem status from * @brk: break signal * * Push an empty frame in to the transmit queue to update the modem status * bits and to transmit an optional break. * * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, u8 brk) { u8 *dp = NULL; struct gsm_msg *msg; int size = 0; /* for modem bits without break data */ switch (dlci->adaption) { case 1: /* Unstructured */ break; case 2: /* Unstructured with modem bits. */ size++; if (brk > 0) size++; break; default: pr_err("%s: unsupported adaption %d\n", __func__, dlci->adaption); return -EINVAL; } msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (!msg) { pr_err("%s: gsm_data_alloc error", __func__); return -ENOMEM; } dp = msg->data; switch (dlci->adaption) { case 1: /* Unstructured */ break; case 2: /* Unstructured with modem bits. */ if (brk == 0) { *dp++ = (gsm_encode_modem(dlci) << 1) | EA; } else { *dp++ = gsm_encode_modem(dlci) << 1; *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */ } break; default: /* Handled above */ break; } __gsm_data_queue(dlci, msg); return size; } /** * gsm_dlci_data_sweep - look for data to send * @gsm: the GSM mux * * Sweep the GSM mux channels in priority order looking for ones with * data to send. We could do with optimising this scan a bit. We aim * to fill the queue totally or up to TX_THRESH_HI bytes. Once we hit * TX_THRESH_LO we get called again * * FIXME: We should round robin between groups and in theory you can * renegotiate DLCI priorities with optional stuff. Needs optimising. */ static int gsm_dlci_data_sweep(struct gsm_mux *gsm) { /* Priority ordering: We should do priority with RR of the groups */ int i, len, ret = 0; bool sent; struct gsm_dlci *dlci; while (gsm->tx_bytes < TX_THRESH_HI) { for (sent = false, i = 1; i < NUM_DLCI; i++) { dlci = gsm->dlci[i]; /* skip unused or blocked channel */ if (!dlci || dlci->constipated) continue; /* skip channels with invalid state */ if (dlci->state != DLCI_OPEN) continue; /* count the sent data per adaption */ if (dlci->adaption < 3 && !dlci->net) len = gsm_dlci_data_output(gsm, dlci); else len = gsm_dlci_data_output_framed(gsm, dlci); /* on error exit */ if (len < 0) return ret; if (len > 0) { ret++; sent = true; /* The lower DLCs can starve the higher DLCs! */ break; } /* try next */ } if (!sent) break; } return ret; } /** * gsm_dlci_data_kick - transmit if possible * @dlci: DLCI to kick * * Transmit data from this DLCI if the queue is empty. We can't rely on * a tty wakeup except when we filled the pipe so we need to fire off * new data ourselves in other cases. */ static void gsm_dlci_data_kick(struct gsm_dlci *dlci) { unsigned long flags; int sweep; if (dlci->constipated) return; spin_lock_irqsave(&dlci->gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ sweep = (dlci->gsm->tx_bytes < TX_THRESH_LO); if (dlci->gsm->tx_bytes == 0) { if (dlci->net) gsm_dlci_data_output_framed(dlci->gsm, dlci); else gsm_dlci_data_output(dlci->gsm, dlci); } if (sweep) gsm_dlci_data_sweep(dlci->gsm); spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /* * Control message processing */ /** * gsm_control_command - send a command frame to a control * @gsm: gsm channel * @cmd: the command to use * @data: data to follow encoded info * @dlen: length of data * * Encode up and queue a UI/UIH frame containing our command. */ static int gsm_control_command(struct gsm_mux *gsm, int cmd, const u8 *data, int dlen) { struct gsm_msg *msg; struct gsm_dlci *dlci = gsm->dlci[0]; msg = gsm_data_alloc(gsm, 0, dlen + 2, dlci->ftype); if (msg == NULL) return -ENOMEM; msg->data[0] = (cmd << 1) | CR | EA; /* Set C/R */ msg->data[1] = (dlen << 1) | EA; memcpy(msg->data + 2, data, dlen); gsm_data_queue(dlci, msg); return 0; } /** * gsm_control_reply - send a response frame to a control * @gsm: gsm channel * @cmd: the command to use * @data: data to follow encoded info * @dlen: length of data * * Encode up and queue a UI/UIH frame containing our response. */ static void gsm_control_reply(struct gsm_mux *gsm, int cmd, const u8 *data, int dlen) { struct gsm_msg *msg; struct gsm_dlci *dlci = gsm->dlci[0]; msg = gsm_data_alloc(gsm, 0, dlen + 2, dlci->ftype); if (msg == NULL) return; msg->data[0] = (cmd & 0xFE) << 1 | EA; /* Clear C/R */ msg->data[1] = (dlen << 1) | EA; memcpy(msg->data + 2, data, dlen); gsm_data_queue(dlci, msg); } /** * gsm_process_modem - process received modem status * @tty: virtual tty bound to the DLCI * @dlci: DLCI to affect * @modem: modem bits (full EA) * @slen: number of signal octets * * Used when a modem control message or line state inline in adaption * layer 2 is processed. Sort out the local modem state and throttles */ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci, u32 modem, int slen) { int mlines = 0; u8 brk = 0; int fc; /* The modem status command can either contain one octet (V.24 signals) * or two octets (V.24 signals + break signals). This is specified in * section 5.4.6.3.7 of the 07.10 mux spec. */ if (slen == 1) modem = modem & 0x7f; else { brk = modem & 0x7f; modem = (modem >> 7) & 0x7f; } /* Flow control/ready to communicate */ fc = (modem & MDM_FC) || !(modem & MDM_RTR); if (fc && !dlci->constipated) { /* Need to throttle our output on this device */ dlci->constipated = true; } else if (!fc && dlci->constipated) { dlci->constipated = false; gsm_dlci_data_kick(dlci); } /* Map modem bits */ if (modem & MDM_RTC) mlines |= TIOCM_DSR | TIOCM_DTR; if (modem & MDM_RTR) mlines |= TIOCM_RTS | TIOCM_CTS; if (modem & MDM_IC) mlines |= TIOCM_RI; if (modem & MDM_DV) mlines |= TIOCM_CD; /* Carrier drop -> hangup */ if (tty) { if ((mlines & TIOCM_CD) == 0 && (dlci->modem_rx & TIOCM_CD)) if (!C_CLOCAL(tty)) tty_hangup(tty); } if (brk & 0x01) tty_insert_flip_char(&dlci->port, 0, TTY_BREAK); dlci->modem_rx = mlines; wake_up_interruptible(&dlci->gsm->event); } /** * gsm_process_negotiation - process received parameters * @gsm: GSM channel * @addr: DLCI address * @cr: command/response * @params: encoded parameters from the parameter negotiation message * * Used when the response for our parameter negotiation command was * received. */ static int gsm_process_negotiation(struct gsm_mux *gsm, unsigned int addr, unsigned int cr, const struct gsm_dlci_param_bits *params) { struct gsm_dlci *dlci = gsm->dlci[addr]; unsigned int ftype, i, adaption, prio, n1, k; i = FIELD_GET(PN_I_CL_FIELD_FTYPE, params->i_cl_bits); adaption = FIELD_GET(PN_I_CL_FIELD_ADAPTION, params->i_cl_bits) + 1; prio = FIELD_GET(PN_P_FIELD_PRIO, params->p_bits); n1 = FIELD_GET(PN_N_FIELD_N1, get_unaligned_le16(&params->n_bits)); k = FIELD_GET(PN_K_FIELD_K, params->k_bits); if (n1 < MIN_MTU) { if (debug & DBG_ERRORS) pr_info("%s N1 out of range in PN\n", __func__); return -EINVAL; } switch (i) { case 0x00: ftype = UIH; break; case 0x01: ftype = UI; break; case 0x02: /* I frames are not supported */ if (debug & DBG_ERRORS) pr_info("%s unsupported I frame request in PN\n", __func__); gsm->unsupported++; return -EINVAL; default: if (debug & DBG_ERRORS) pr_info("%s i out of range in PN\n", __func__); return -EINVAL; } if (!cr && gsm->initiator) { if (adaption != dlci->adaption) { if (debug & DBG_ERRORS) pr_info("%s invalid adaption %d in PN\n", __func__, adaption); return -EINVAL; } if (prio != dlci->prio) { if (debug & DBG_ERRORS) pr_info("%s invalid priority %d in PN", __func__, prio); return -EINVAL; } if (n1 > gsm->mru || n1 > dlci->mtu) { /* We requested a frame size but the other party wants * to send larger frames. The standard allows only a * smaller response value than requested (5.4.6.3.1). */ if (debug & DBG_ERRORS) pr_info("%s invalid N1 %d in PN\n", __func__, n1); return -EINVAL; } dlci->mtu = n1; if (ftype != dlci->ftype) { if (debug & DBG_ERRORS) pr_info("%s invalid i %d in PN\n", __func__, i); return -EINVAL; } if (ftype != UI && ftype != UIH && k > dlci->k) { if (debug & DBG_ERRORS) pr_info("%s invalid k %d in PN\n", __func__, k); return -EINVAL; } dlci->k = k; } else if (cr && !gsm->initiator) { /* Only convergence layer type 1 and 2 are supported. */ if (adaption != 1 && adaption != 2) { if (debug & DBG_ERRORS) pr_info("%s invalid adaption %d in PN\n", __func__, adaption); return -EINVAL; } dlci->adaption = adaption; if (n1 > gsm->mru) { /* Propose a smaller value */ dlci->mtu = gsm->mru; } else if (n1 > MAX_MTU) { /* Propose a smaller value */ dlci->mtu = MAX_MTU; } else { dlci->mtu = n1; } dlci->prio = prio; dlci->ftype = ftype; dlci->k = k; } else { return -EINVAL; } return 0; } /** * gsm_control_modem - modem status received * @gsm: GSM channel * @data: data following command * @clen: command length * * We have received a modem status control message. This is used by * the GSM mux protocol to pass virtual modem line status and optionally * to indicate break signals. Unpack it, convert to Linux representation * and if need be stuff a break message down the tty. */ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) { unsigned int addr = 0; unsigned int modem = 0; struct gsm_dlci *dlci; int len = clen; int cl = clen; const u8 *dp = data; struct tty_struct *tty; len = gsm_read_ea_val(&addr, data, cl); if (len < 1) return; addr >>= 1; /* Closed port, or invalid ? */ if (addr == 0 || addr >= NUM_DLCI || gsm->dlci[addr] == NULL) return; dlci = gsm->dlci[addr]; /* Must be at least one byte following the EA */ if ((cl - len) < 1) return; dp += len; cl -= len; /* get the modem status */ len = gsm_read_ea_val(&modem, dp, cl); if (len < 1) return; tty = tty_port_tty_get(&dlci->port); gsm_process_modem(tty, dlci, modem, cl); if (tty) { tty_wakeup(tty); tty_kref_put(tty); } gsm_control_reply(gsm, CMD_MSC, data, clen); } /** * gsm_control_negotiation - parameter negotiation received * @gsm: GSM channel * @cr: command/response flag * @data: data following command * @dlen: data length * * We have received a parameter negotiation message. This is used by * the GSM mux protocol to configure protocol parameters for a new DLCI. */ static void gsm_control_negotiation(struct gsm_mux *gsm, unsigned int cr, const u8 *data, unsigned int dlen) { unsigned int addr; struct gsm_dlci_param_bits pn_reply; struct gsm_dlci *dlci; struct gsm_dlci_param_bits *params; if (dlen < sizeof(struct gsm_dlci_param_bits)) { gsm->open_error++; return; } /* Invalid DLCI? */ params = (struct gsm_dlci_param_bits *)data; addr = FIELD_GET(PN_D_FIELD_DLCI, params->d_bits); if (addr == 0 || addr >= NUM_DLCI || !gsm->dlci[addr]) { gsm->open_error++; return; } dlci = gsm->dlci[addr]; /* Too late for parameter negotiation? */ if ((!cr && dlci->state == DLCI_OPENING) || dlci->state == DLCI_OPEN) { gsm->open_error++; return; } /* Process the received parameters */ if (gsm_process_negotiation(gsm, addr, cr, params) != 0) { /* Negotiation failed. Close the link. */ if (debug & DBG_ERRORS) pr_info("%s PN failed\n", __func__); gsm->open_error++; gsm_dlci_close(dlci); return; } if (cr) { /* Reply command with accepted parameters. */ if (gsm_encode_params(dlci, &pn_reply) == 0) gsm_control_reply(gsm, CMD_PN, (const u8 *)&pn_reply, sizeof(pn_reply)); else if (debug & DBG_ERRORS) pr_info("%s PN invalid\n", __func__); } else if (dlci->state == DLCI_CONFIGURE) { /* Proceed with link setup by sending SABM before UA */ dlci->state = DLCI_OPENING; gsm_command(gsm, dlci->addr, SABM|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else { if (debug & DBG_ERRORS) pr_info("%s PN in invalid state\n", __func__); gsm->open_error++; } } /** * gsm_control_rls - remote line status * @gsm: GSM channel * @data: data bytes * @clen: data length * * The modem sends us a two byte message on the control channel whenever * it wishes to send us an error state from the virtual link. Stuff * this into the uplink tty if present */ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen) { struct tty_port *port; unsigned int addr = 0; u8 bits; int len = clen; const u8 *dp = data; while (gsm_read_ea(&addr, *dp++) == 0) { len--; if (len == 0) return; } /* Must be at least one byte following ea */ len--; if (len <= 0) return; addr >>= 1; /* Closed port, or invalid ? */ if (addr == 0 || addr >= NUM_DLCI || gsm->dlci[addr] == NULL) return; /* No error ? */ bits = *dp; if ((bits & 1) == 0) return; port = &gsm->dlci[addr]->port; if (bits & 2) tty_insert_flip_char(port, 0, TTY_OVERRUN); if (bits & 4) tty_insert_flip_char(port, 0, TTY_PARITY); if (bits & 8) tty_insert_flip_char(port, 0, TTY_FRAME); tty_flip_buffer_push(port); gsm_control_reply(gsm, CMD_RLS, data, clen); } static void gsm_dlci_begin_close(struct gsm_dlci *dlci); /** * gsm_control_message - DLCI 0 control processing * @gsm: our GSM mux * @command: the command EA * @data: data beyond the command/length EAs * @clen: length * * Input processor for control messages from the other end of the link. * Processes the incoming request and queues a response frame or an * NSC response if not supported */ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, const u8 *data, int clen) { u8 buf[1]; switch (command) { case CMD_CLD: { struct gsm_dlci *dlci = gsm->dlci[0]; /* Modem wishes to close down */ if (dlci) { dlci->dead = true; gsm->dead = true; gsm_dlci_begin_close(dlci); } } break; case CMD_TEST: /* Modem wishes to test, reply with the data */ gsm_control_reply(gsm, CMD_TEST, data, clen); break; case CMD_FCON: /* Modem can accept data again */ gsm->constipated = false; gsm_control_reply(gsm, CMD_FCON, NULL, 0); /* Kick the link in case it is idling */ gsmld_write_trigger(gsm); break; case CMD_FCOFF: /* Modem wants us to STFU */ gsm->constipated = true; gsm_control_reply(gsm, CMD_FCOFF, NULL, 0); break; case CMD_MSC: /* Out of band modem line change indicator for a DLCI */ gsm_control_modem(gsm, data, clen); break; case CMD_RLS: /* Out of band error reception for a DLCI */ gsm_control_rls(gsm, data, clen); break; case CMD_PSC: /* Modem wishes to enter power saving state */ gsm_control_reply(gsm, CMD_PSC, NULL, 0); break; /* Optional commands */ case CMD_PN: /* Modem sends a parameter negotiation command */ gsm_control_negotiation(gsm, 1, data, clen); break; /* Optional unsupported commands */ case CMD_RPN: /* Remote port negotiation */ case CMD_SNC: /* Service negotiation command */ gsm->unsupported++; fallthrough; default: /* Reply to bad commands with an NSC */ buf[0] = command; gsm_control_reply(gsm, CMD_NSC, buf, 1); break; } } /** * gsm_control_response - process a response to our control * @gsm: our GSM mux * @command: the command (response) EA * @data: data beyond the command/length EA * @clen: length * * Process a response to an outstanding command. We only allow a single * control message in flight so this is fairly easy. All the clean up * is done by the caller, we just update the fields, flag it as done * and return */ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, const u8 *data, int clen) { struct gsm_control *ctrl; struct gsm_dlci *dlci; unsigned long flags; spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; dlci = gsm->dlci[0]; command |= 1; /* Does the reply match our command */ if (ctrl != NULL && (command == ctrl->cmd || command == CMD_NSC)) { /* Our command was replied to, kill the retry timer */ del_timer(&gsm->t2_timer); gsm->pending_cmd = NULL; /* Rejected by the other end */ if (command == CMD_NSC) ctrl->error = -EOPNOTSUPP; ctrl->done = 1; wake_up(&gsm->event); /* Or did we receive the PN response to our PN command */ } else if (command == CMD_PN) { gsm_control_negotiation(gsm, 0, data, clen); /* Or did we receive the TEST response to our TEST command */ } else if (command == CMD_TEST && clen == 1 && *data == gsm->ka_num) { gsm->ka_retries = -1; /* trigger new keep-alive message */ if (dlci && !dlci->dead) mod_timer(&gsm->ka_timer, jiffies + gsm->keep_alive * HZ / 100); } spin_unlock_irqrestore(&gsm->control_lock, flags); } /** * gsm_control_keep_alive - check timeout or start keep-alive * @t: timer contained in our gsm object * * Called off the keep-alive timer expiry signaling that our link * partner is not responding anymore. Link will be closed. * This is also called to startup our timer. */ static void gsm_control_keep_alive(struct timer_list *t) { struct gsm_mux *gsm = from_timer(gsm, t, ka_timer); unsigned long flags; spin_lock_irqsave(&gsm->control_lock, flags); if (gsm->ka_num && gsm->ka_retries == 0) { /* Keep-alive expired -> close the link */ if (debug & DBG_ERRORS) pr_debug("%s keep-alive timed out\n", __func__); spin_unlock_irqrestore(&gsm->control_lock, flags); if (gsm->dlci[0]) gsm_dlci_begin_close(gsm->dlci[0]); return; } else if (gsm->keep_alive && gsm->dlci[0] && !gsm->dlci[0]->dead) { if (gsm->ka_retries > 0) { /* T2 expired for keep-alive -> resend */ gsm->ka_retries--; } else { /* Start keep-alive timer */ gsm->ka_num++; if (!gsm->ka_num) gsm->ka_num++; gsm->ka_retries = (signed int)gsm->n2; } gsm_control_command(gsm, CMD_TEST, &gsm->ka_num, sizeof(gsm->ka_num)); mod_timer(&gsm->ka_timer, jiffies + gsm->t2 * HZ / 100); } spin_unlock_irqrestore(&gsm->control_lock, flags); } /** * gsm_control_transmit - send control packet * @gsm: gsm mux * @ctrl: frame to send * * Send out a pending control command (called under control lock) */ static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl) { gsm_control_command(gsm, ctrl->cmd, ctrl->data, ctrl->len); } /** * gsm_control_retransmit - retransmit a control frame * @t: timer contained in our gsm object * * Called off the T2 timer expiry in order to retransmit control frames * that have been lost in the system somewhere. The control_lock protects * us from colliding with another sender or a receive completion event. * In that situation the timer may still occur in a small window but * gsm->pending_cmd will be NULL and we just let the timer expire. */ static void gsm_control_retransmit(struct timer_list *t) { struct gsm_mux *gsm = from_timer(gsm, t, t2_timer); struct gsm_control *ctrl; unsigned long flags; spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; if (ctrl) { if (gsm->cretries == 0 || !gsm->dlci[0] || gsm->dlci[0]->dead) { gsm->pending_cmd = NULL; ctrl->error = -ETIMEDOUT; ctrl->done = 1; spin_unlock_irqrestore(&gsm->control_lock, flags); wake_up(&gsm->event); return; } gsm->cretries--; gsm_control_transmit(gsm, ctrl); mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); } spin_unlock_irqrestore(&gsm->control_lock, flags); } /** * gsm_control_send - send a control frame on DLCI 0 * @gsm: the GSM channel * @command: command to send including CR bit * @data: bytes of data (must be kmalloced) * @clen: length of the block to send * * Queue and dispatch a control command. Only one command can be * active at a time. In theory more can be outstanding but the matching * gets really complicated so for now stick to one outstanding. */ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm, unsigned int command, u8 *data, int clen) { struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control), GFP_ATOMIC); unsigned long flags; if (ctrl == NULL) return NULL; retry: wait_event(gsm->event, gsm->pending_cmd == NULL); spin_lock_irqsave(&gsm->control_lock, flags); if (gsm->pending_cmd != NULL) { spin_unlock_irqrestore(&gsm->control_lock, flags); goto retry; } ctrl->cmd = command; ctrl->data = data; ctrl->len = clen; gsm->pending_cmd = ctrl; /* If DLCI0 is in ADM mode skip retries, it won't respond */ if (gsm->dlci[0]->mode == DLCI_MODE_ADM) gsm->cretries = 0; else gsm->cretries = gsm->n2; mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); gsm_control_transmit(gsm, ctrl); spin_unlock_irqrestore(&gsm->control_lock, flags); return ctrl; } /** * gsm_control_wait - wait for a control to finish * @gsm: GSM mux * @control: control we are waiting on * * Waits for the control to complete or time out. Frees any used * resources and returns 0 for success, or an error if the remote * rejected or ignored the request. */ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control) { int err; wait_event(gsm->event, control->done == 1); err = control->error; kfree(control); return err; } /* * DLCI level handling: Needs krefs */ /* * State transitions and timers */ /** * gsm_dlci_close - a DLCI has closed * @dlci: DLCI that closed * * Perform processing when moving a DLCI into closed state. If there * is an attached tty this is hung up */ static void gsm_dlci_close(struct gsm_dlci *dlci) { del_timer(&dlci->t1); if (debug & DBG_ERRORS) pr_debug("DLCI %d goes closed.\n", dlci->addr); dlci->state = DLCI_CLOSED; /* Prevent us from sending data before the link is up again */ dlci->constipated = true; if (dlci->addr != 0) { tty_port_tty_hangup(&dlci->port, false); gsm_dlci_clear_queues(dlci->gsm, dlci); /* Ensure that gsmtty_open() can return. */ tty_port_set_initialized(&dlci->port, false); wake_up_interruptible(&dlci->port.open_wait); } else { del_timer(&dlci->gsm->ka_timer); dlci->gsm->dead = true; } /* A DLCI 0 close is a MUX termination so we need to kick that back to userspace somehow */ gsm_dlci_data_kick(dlci); wake_up_all(&dlci->gsm->event); } /** * gsm_dlci_open - a DLCI has opened * @dlci: DLCI that opened * * Perform processing when moving a DLCI into open state. */ static void gsm_dlci_open(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci->gsm; /* Note that SABM UA .. SABM UA first UA lost can mean that we go open -> open */ del_timer(&dlci->t1); /* This will let a tty open continue */ dlci->state = DLCI_OPEN; dlci->constipated = false; if (debug & DBG_ERRORS) pr_debug("DLCI %d goes open.\n", dlci->addr); /* Send current modem state */ if (dlci->addr) { gsm_modem_update(dlci, 0); } else { /* Start keep-alive control */ gsm->ka_num = 0; gsm->ka_retries = -1; mod_timer(&gsm->ka_timer, jiffies + gsm->keep_alive * HZ / 100); } gsm_dlci_data_kick(dlci); wake_up(&dlci->gsm->event); } /** * gsm_dlci_negotiate - start parameter negotiation * @dlci: DLCI to open * * Starts the parameter negotiation for the new DLCI. This needs to be done * before the DLCI initialized the channel via SABM. */ static int gsm_dlci_negotiate(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci->gsm; struct gsm_dlci_param_bits params; int ret; ret = gsm_encode_params(dlci, &params); if (ret != 0) return ret; /* We cannot asynchronous wait for the command response with * gsm_command() and gsm_control_wait() at this point. */ ret = gsm_control_command(gsm, CMD_PN, (const u8 *)&params, sizeof(params)); return ret; } /** * gsm_dlci_t1 - T1 timer expiry * @t: timer contained in the DLCI that opened * * The T1 timer handles retransmits of control frames (essentially of * SABM and DISC). We resend the command until the retry count runs out * in which case an opening port goes back to closed and a closing port * is simply put into closed state (any further frames from the other * end will get a DM response) * * Some control dlci can stay in ADM mode with other dlci working just * fine. In that case we can just keep the control dlci open after the * DLCI_OPENING retries time out. */ static void gsm_dlci_t1(struct timer_list *t) { struct gsm_dlci *dlci = from_timer(dlci, t, t1); struct gsm_mux *gsm = dlci->gsm; switch (dlci->state) { case DLCI_CONFIGURE: if (dlci->retries && gsm_dlci_negotiate(dlci) == 0) { dlci->retries--; mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else { gsm->open_error++; gsm_dlci_begin_close(dlci); /* prevent half open link */ } break; case DLCI_OPENING: if (dlci->retries) { dlci->retries--; gsm_command(dlci->gsm, dlci->addr, SABM|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else if (!dlci->addr && gsm->control == (DM | PF)) { if (debug & DBG_ERRORS) pr_info("DLCI %d opening in ADM mode.\n", dlci->addr); dlci->mode = DLCI_MODE_ADM; gsm_dlci_open(dlci); } else { gsm->open_error++; gsm_dlci_begin_close(dlci); /* prevent half open link */ } break; case DLCI_CLOSING: if (dlci->retries) { dlci->retries--; gsm_command(dlci->gsm, dlci->addr, DISC|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else gsm_dlci_close(dlci); break; default: pr_debug("%s: unhandled state: %d\n", __func__, dlci->state); break; } } /** * gsm_dlci_begin_open - start channel open procedure * @dlci: DLCI to open * * Commence opening a DLCI from the Linux side. We issue SABM messages * to the modem which should then reply with a UA or ADM, at which point * we will move into open state. Opening is done asynchronously with retry * running off timers and the responses. * Parameter negotiation is performed before SABM if required. */ static void gsm_dlci_begin_open(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci ? dlci->gsm : NULL; bool need_pn = false; if (!gsm) return; if (dlci->addr != 0) { if (gsm->adaption != 1 || gsm->adaption != dlci->adaption) need_pn = true; if (dlci->prio != (roundup(dlci->addr + 1, 8) - 1)) need_pn = true; if (gsm->ftype != dlci->ftype) need_pn = true; } switch (dlci->state) { case DLCI_CLOSED: case DLCI_WAITING_CONFIG: case DLCI_CLOSING: dlci->retries = gsm->n2; if (!need_pn) { dlci->state = DLCI_OPENING; gsm_command(gsm, dlci->addr, SABM|PF); } else { /* Configure DLCI before setup */ dlci->state = DLCI_CONFIGURE; if (gsm_dlci_negotiate(dlci) != 0) { gsm_dlci_close(dlci); return; } } mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); break; default: break; } } /** * gsm_dlci_set_opening - change state to opening * @dlci: DLCI to open * * Change internal state to wait for DLCI open from initiator side. * We set off timers and responses upon reception of an SABM. */ static void gsm_dlci_set_opening(struct gsm_dlci *dlci) { switch (dlci->state) { case DLCI_CLOSED: case DLCI_WAITING_CONFIG: case DLCI_CLOSING: dlci->state = DLCI_OPENING; break; default: break; } } /** * gsm_dlci_set_wait_config - wait for channel configuration * @dlci: DLCI to configure * * Wait for a DLCI configuration from the application. */ static void gsm_dlci_set_wait_config(struct gsm_dlci *dlci) { switch (dlci->state) { case DLCI_CLOSED: case DLCI_CLOSING: dlci->state = DLCI_WAITING_CONFIG; break; default: break; } } /** * gsm_dlci_begin_close - start channel open procedure * @dlci: DLCI to open * * Commence closing a DLCI from the Linux side. We issue DISC messages * to the modem which should then reply with a UA, at which point we * will move into closed state. Closing is done asynchronously with retry * off timers. We may also receive a DM reply from the other end which * indicates the channel was already closed. */ static void gsm_dlci_begin_close(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci->gsm; if (dlci->state == DLCI_CLOSED || dlci->state == DLCI_CLOSING) return; dlci->retries = gsm->n2; dlci->state = DLCI_CLOSING; gsm_command(dlci->gsm, dlci->addr, DISC|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); wake_up_interruptible(&gsm->event); } /** * gsm_dlci_data - data arrived * @dlci: channel * @data: block of bytes received * @clen: length of received block * * A UI or UIH frame has arrived which contains data for a channel * other than the control channel. If the relevant virtual tty is * open we shovel the bits down it, if not we drop them. */ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) { /* krefs .. */ struct tty_port *port = &dlci->port; struct tty_struct *tty; unsigned int modem = 0; int len; if (debug & DBG_TTY) pr_debug("%d bytes for tty\n", clen); switch (dlci->adaption) { /* Unsupported types */ case 4: /* Packetised interruptible data */ break; case 3: /* Packetised uininterruptible voice/data */ break; case 2: /* Asynchronous serial with line state in each frame */ len = gsm_read_ea_val(&modem, data, clen); if (len < 1) return; tty = tty_port_tty_get(port); if (tty) { gsm_process_modem(tty, dlci, modem, len); tty_wakeup(tty); tty_kref_put(tty); } /* Skip processed modem data */ data += len; clen -= len; fallthrough; case 1: /* Line state will go via DLCI 0 controls only */ default: tty_insert_flip_string(port, data, clen); tty_flip_buffer_push(port); } } /** * gsm_dlci_command - data arrived on control channel * @dlci: channel * @data: block of bytes received * @len: length of received block * * A UI or UIH frame has arrived which contains data for DLCI 0 the * control channel. This should contain a command EA followed by * control data bytes. The command EA contains a command/response bit * and we divide up the work accordingly. */ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) { /* See what command is involved */ unsigned int command = 0; unsigned int clen = 0; unsigned int dlen; /* read the command */ dlen = gsm_read_ea_val(&command, data, len); len -= dlen; data += dlen; /* read any control data */ dlen = gsm_read_ea_val(&clen, data, len); len -= dlen; data += dlen; /* Malformed command? */ if (clen > len) { dlci->gsm->malformed++; return; } if (command & 1) gsm_control_message(dlci->gsm, command, data, clen); else gsm_control_response(dlci->gsm, command, data, clen); } /** * gsm_kick_timer - transmit if possible * @t: timer contained in our gsm object * * Transmit data from DLCIs if the queue is empty. We can't rely on * a tty wakeup except when we filled the pipe so we need to fire off * new data ourselves in other cases. */ static void gsm_kick_timer(struct timer_list *t) { struct gsm_mux *gsm = from_timer(gsm, t, kick_timer); unsigned long flags; int sent = 0; spin_lock_irqsave(&gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ if (gsm->tx_bytes < TX_THRESH_LO) sent = gsm_dlci_data_sweep(gsm); spin_unlock_irqrestore(&gsm->tx_lock, flags); if (sent && debug & DBG_DATA) pr_info("%s TX queue stalled\n", __func__); } /** * gsm_dlci_copy_config_values - copy DLCI configuration * @dlci: source DLCI * @dc: configuration structure to fill */ static void gsm_dlci_copy_config_values(struct gsm_dlci *dlci, struct gsm_dlci_config *dc) { memset(dc, 0, sizeof(*dc)); dc->channel = (u32)dlci->addr; dc->adaption = (u32)dlci->adaption; dc->mtu = (u32)dlci->mtu; dc->priority = (u32)dlci->prio; if (dlci->ftype == UIH) dc->i = 1; else dc->i = 2; dc->k = (u32)dlci->k; } /** * gsm_dlci_config - configure DLCI from configuration * @dlci: DLCI to configure * @dc: DLCI configuration * @open: open DLCI after configuration? */ static int gsm_dlci_config(struct gsm_dlci *dlci, struct gsm_dlci_config *dc, int open) { struct gsm_mux *gsm; bool need_restart = false; bool need_open = false; unsigned int i; /* * Check that userspace doesn't put stuff in here to prevent breakages * in the future. */ for (i = 0; i < ARRAY_SIZE(dc->reserved); i++) if (dc->reserved[i]) return -EINVAL; if (!dlci) return -EINVAL; gsm = dlci->gsm; /* Stuff we don't support yet - I frame transport */ if (dc->adaption != 1 && dc->adaption != 2) return -EOPNOTSUPP; if (dc->mtu > MAX_MTU || dc->mtu < MIN_MTU || dc->mtu > gsm->mru) return -EINVAL; if (dc->priority >= 64) return -EINVAL; if (dc->i == 0 || dc->i > 2) /* UIH and UI only */ return -EINVAL; if (dc->k > 7) return -EINVAL; if (dc->flags & ~GSM_FL_RESTART) /* allow future extensions */ return -EINVAL; /* * See what is needed for reconfiguration */ /* Framing fields */ if (dc->adaption != dlci->adaption) need_restart = true; if (dc->mtu != dlci->mtu) need_restart = true; if (dc->i != dlci->ftype) need_restart = true; /* Requires care */ if (dc->priority != dlci->prio) need_restart = true; if (dc->flags & GSM_FL_RESTART) need_restart = true; if ((open && gsm->wait_config) || need_restart) need_open = true; if (dlci->state == DLCI_WAITING_CONFIG) { need_restart = false; need_open = true; } /* * Close down what is needed, restart and initiate the new * configuration. */ if (need_restart) { gsm_dlci_begin_close(dlci); wait_event_interruptible(gsm->event, dlci->state == DLCI_CLOSED); if (signal_pending(current)) return -EINTR; } /* * Setup the new configuration values */ dlci->adaption = (int)dc->adaption; if (dc->mtu) dlci->mtu = (unsigned int)dc->mtu; else dlci->mtu = gsm->mtu; if (dc->priority) dlci->prio = (u8)dc->priority; else dlci->prio = roundup(dlci->addr + 1, 8) - 1; if (dc->i == 1) dlci->ftype = UIH; else if (dc->i == 2) dlci->ftype = UI; if (dc->k) dlci->k = (u8)dc->k; else dlci->k = gsm->k; if (need_open) { if (gsm->initiator) gsm_dlci_begin_open(dlci); else gsm_dlci_set_opening(dlci); } return 0; } /* * Allocate/Free DLCI channels */ /** * gsm_dlci_alloc - allocate a DLCI * @gsm: GSM mux * @addr: address of the DLCI * * Allocate and install a new DLCI object into the GSM mux. * * FIXME: review locking races */ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr) { struct gsm_dlci *dlci = kzalloc(sizeof(struct gsm_dlci), GFP_ATOMIC); if (dlci == NULL) return NULL; spin_lock_init(&dlci->lock); mutex_init(&dlci->mutex); if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) { kfree(dlci); return NULL; } skb_queue_head_init(&dlci->skb_list); timer_setup(&dlci->t1, gsm_dlci_t1, 0); tty_port_init(&dlci->port); dlci->port.ops = &gsm_port_ops; dlci->gsm = gsm; dlci->addr = addr; dlci->adaption = gsm->adaption; dlci->mtu = gsm->mtu; if (addr == 0) dlci->prio = 0; else dlci->prio = roundup(addr + 1, 8) - 1; dlci->ftype = gsm->ftype; dlci->k = gsm->k; dlci->state = DLCI_CLOSED; if (addr) { dlci->data = gsm_dlci_data; /* Prevent us from sending data before the link is up */ dlci->constipated = true; } else { dlci->data = gsm_dlci_command; } gsm->dlci[addr] = dlci; return dlci; } /** * gsm_dlci_free - free DLCI * @port: tty port for DLCI to free * * Free up a DLCI. * * Can sleep. */ static void gsm_dlci_free(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); timer_shutdown_sync(&dlci->t1); dlci->gsm->dlci[dlci->addr] = NULL; kfifo_free(&dlci->fifo); while ((dlci->skb = skb_dequeue(&dlci->skb_list))) dev_kfree_skb(dlci->skb); kfree(dlci); } static inline void dlci_get(struct gsm_dlci *dlci) { tty_port_get(&dlci->port); } static inline void dlci_put(struct gsm_dlci *dlci) { tty_port_put(&dlci->port); } static void gsm_destroy_network(struct gsm_dlci *dlci); /** * gsm_dlci_release - release DLCI * @dlci: DLCI to destroy * * Release a DLCI. Actual free is deferred until either * mux is closed or tty is closed - whichever is last. * * Can sleep. */ static void gsm_dlci_release(struct gsm_dlci *dlci) { struct tty_struct *tty = tty_port_tty_get(&dlci->port); if (tty) { mutex_lock(&dlci->mutex); gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); /* We cannot use tty_hangup() because in tty_kref_put() the tty * driver assumes that the hangup queue is free and reuses it to * queue release_one_tty() -> NULL pointer panic in * process_one_work(). */ tty_vhangup(tty); tty_port_tty_set(&dlci->port, NULL); tty_kref_put(tty); } dlci->state = DLCI_CLOSED; dlci_put(dlci); } /* * LAPBish link layer logic */ /** * gsm_queue - a GSM frame is ready to process * @gsm: pointer to our gsm mux * * At this point in time a frame has arrived and been demangled from * the line encoding. All the differences between the encodings have * been handled below us and the frame is unpacked into the structures. * The fcs holds the header FCS but any data FCS must be added here. */ static void gsm_queue(struct gsm_mux *gsm) { struct gsm_dlci *dlci; u8 cr; int address; if (gsm->fcs != GOOD_FCS) { gsm->bad_fcs++; if (debug & DBG_DATA) pr_debug("BAD FCS %02x\n", gsm->fcs); return; } address = gsm->address >> 1; if (address >= NUM_DLCI) goto invalid; cr = gsm->address & 1; /* C/R bit */ cr ^= gsm->initiator ? 0 : 1; /* Flip so 1 always means command */ gsm_print_packet("<--", address, cr, gsm->control, gsm->buf, gsm->len); dlci = gsm->dlci[address]; switch (gsm->control) { case SABM|PF: if (cr == 1) { gsm->open_error++; goto invalid; } if (dlci == NULL) dlci = gsm_dlci_alloc(gsm, address); if (dlci == NULL) { gsm->open_error++; return; } if (dlci->dead) gsm_response(gsm, address, DM|PF); else { gsm_response(gsm, address, UA|PF); gsm_dlci_open(dlci); } break; case DISC|PF: if (cr == 1) goto invalid; if (dlci == NULL || dlci->state == DLCI_CLOSED) { gsm_response(gsm, address, DM|PF); return; } /* Real close complete */ gsm_response(gsm, address, UA|PF); gsm_dlci_close(dlci); break; case UA|PF: if (cr == 0 || dlci == NULL) break; switch (dlci->state) { case DLCI_CLOSING: gsm_dlci_close(dlci); break; case DLCI_OPENING: gsm_dlci_open(dlci); break; default: pr_debug("%s: unhandled state: %d\n", __func__, dlci->state); break; } break; case DM: /* DM can be valid unsolicited */ case DM|PF: if (cr) goto invalid; if (dlci == NULL) return; gsm_dlci_close(dlci); break; case UI: case UI|PF: case UIH: case UIH|PF: if (dlci == NULL || dlci->state != DLCI_OPEN) { gsm_response(gsm, address, DM|PF); return; } dlci->data(dlci, gsm->buf, gsm->len); break; default: goto invalid; } return; invalid: gsm->malformed++; return; } /** * gsm0_receive_state_check_and_fix - check and correct receive state * @gsm: gsm data for this ldisc instance * * Ensures that the current receive state is valid for basic option mode. */ static void gsm0_receive_state_check_and_fix(struct gsm_mux *gsm) { switch (gsm->state) { case GSM_SEARCH: case GSM0_ADDRESS: case GSM0_CONTROL: case GSM0_LEN0: case GSM0_LEN1: case GSM0_DATA: case GSM0_FCS: case GSM0_SSOF: break; default: gsm->state = GSM_SEARCH; break; } } /** * gsm0_receive - perform processing for non-transparency * @gsm: gsm data for this ldisc instance * @c: character * * Receive bytes in gsm mode 0 */ static void gsm0_receive(struct gsm_mux *gsm, u8 c) { unsigned int len; gsm0_receive_state_check_and_fix(gsm); switch (gsm->state) { case GSM_SEARCH: /* SOF marker */ if (c == GSM0_SOF) { gsm->state = GSM0_ADDRESS; gsm->address = 0; gsm->len = 0; gsm->fcs = INIT_FCS; } break; case GSM0_ADDRESS: /* Address EA */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->address, c)) gsm->state = GSM0_CONTROL; break; case GSM0_CONTROL: /* Control Byte */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->control = c; gsm->state = GSM0_LEN0; break; case GSM0_LEN0: /* Length EA */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->len, c)) { if (gsm->len > gsm->mru) { gsm->bad_size++; gsm->state = GSM_SEARCH; break; } gsm->count = 0; if (!gsm->len) gsm->state = GSM0_FCS; else gsm->state = GSM0_DATA; break; } gsm->state = GSM0_LEN1; break; case GSM0_LEN1: gsm->fcs = gsm_fcs_add(gsm->fcs, c); len = c; gsm->len |= len << 7; if (gsm->len > gsm->mru) { gsm->bad_size++; gsm->state = GSM_SEARCH; break; } gsm->count = 0; if (!gsm->len) gsm->state = GSM0_FCS; else gsm->state = GSM0_DATA; break; case GSM0_DATA: /* Data */ gsm->buf[gsm->count++] = c; if (gsm->count >= MAX_MRU) { gsm->bad_size++; gsm->state = GSM_SEARCH; } else if (gsm->count >= gsm->len) { /* Calculate final FCS for UI frames over all data */ if ((gsm->control & ~PF) != UIH) { gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->count); } gsm->state = GSM0_FCS; } break; case GSM0_FCS: /* FCS follows the packet */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->state = GSM0_SSOF; break; case GSM0_SSOF: gsm->state = GSM_SEARCH; if (c == GSM0_SOF) gsm_queue(gsm); else gsm->bad_size++; break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); break; } } /** * gsm1_receive_state_check_and_fix - check and correct receive state * @gsm: gsm data for this ldisc instance * * Ensures that the current receive state is valid for advanced option mode. */ static void gsm1_receive_state_check_and_fix(struct gsm_mux *gsm) { switch (gsm->state) { case GSM_SEARCH: case GSM1_START: case GSM1_ADDRESS: case GSM1_CONTROL: case GSM1_DATA: case GSM1_OVERRUN: break; default: gsm->state = GSM_SEARCH; break; } } /** * gsm1_receive - perform processing for non-transparency * @gsm: gsm data for this ldisc instance * @c: character * * Receive bytes in mode 1 (Advanced option) */ static void gsm1_receive(struct gsm_mux *gsm, u8 c) { gsm1_receive_state_check_and_fix(gsm); /* handle XON/XOFF */ if ((c & ISO_IEC_646_MASK) == XON) { gsm->constipated = true; return; } else if ((c & ISO_IEC_646_MASK) == XOFF) { gsm->constipated = false; /* Kick the link in case it is idling */ gsmld_write_trigger(gsm); return; } if (c == GSM1_SOF) { /* EOF is only valid in frame if we have got to the data state */ if (gsm->state == GSM1_DATA) { if (gsm->count < 1) { /* Missing FSC */ gsm->malformed++; gsm->state = GSM1_START; return; } /* Remove the FCS from data */ gsm->count--; if ((gsm->control & ~PF) != UIH) { /* Calculate final FCS for UI frames over all * data but FCS */ gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->count); } /* Add the FCS itself to test against GOOD_FCS */ gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]); gsm->len = gsm->count; gsm_queue(gsm); gsm->state = GSM1_START; return; } /* Any partial frame was a runt so go back to start */ if (gsm->state != GSM1_START) { if (gsm->state != GSM_SEARCH) gsm->malformed++; gsm->state = GSM1_START; } /* A SOF in GSM_START means we are still reading idling or framing bytes */ return; } if (c == GSM1_ESCAPE) { gsm->escape = true; return; } /* Only an unescaped SOF gets us out of GSM search */ if (gsm->state == GSM_SEARCH) return; if (gsm->escape) { c ^= GSM1_ESCAPE_BITS; gsm->escape = false; } switch (gsm->state) { case GSM1_START: /* First byte after SOF */ gsm->address = 0; gsm->state = GSM1_ADDRESS; gsm->fcs = INIT_FCS; fallthrough; case GSM1_ADDRESS: /* Address continuation */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->address, c)) gsm->state = GSM1_CONTROL; break; case GSM1_CONTROL: /* Control Byte */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->control = c; gsm->count = 0; gsm->state = GSM1_DATA; break; case GSM1_DATA: /* Data */ if (gsm->count > gsm->mru || gsm->count > MAX_MRU) { /* Allow one for the FCS */ gsm->state = GSM1_OVERRUN; gsm->bad_size++; } else gsm->buf[gsm->count++] = c; break; case GSM1_OVERRUN: /* Over-long - eg a dropped SOF */ break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); break; } } /** * gsm_error - handle tty error * @gsm: ldisc data * * Handle an error in the receipt of data for a frame. Currently we just * go back to hunting for a SOF. * * FIXME: better diagnostics ? */ static void gsm_error(struct gsm_mux *gsm) { gsm->state = GSM_SEARCH; gsm->io_error++; } /** * gsm_cleanup_mux - generic GSM protocol cleanup * @gsm: our mux * @disc: disconnect link? * * Clean up the bits of the mux which are the same for all framing * protocols. Remove the mux from the mux table, stop all the timers * and then shut down each device hanging up the channels as we go. */ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) { int i; struct gsm_dlci *dlci; struct gsm_msg *txq, *ntxq; gsm->dead = true; mutex_lock(&gsm->mutex); dlci = gsm->dlci[0]; if (dlci) { if (disc && dlci->state != DLCI_CLOSED) { gsm_dlci_begin_close(dlci); wait_event(gsm->event, dlci->state == DLCI_CLOSED); } dlci->dead = true; } /* Finish outstanding timers, making sure they are done */ del_timer_sync(&gsm->kick_timer); del_timer_sync(&gsm->t2_timer); del_timer_sync(&gsm->ka_timer); /* Finish writing to ldisc */ flush_work(&gsm->tx_work); /* Free up any link layer users and finally the control channel */ if (gsm->has_devices) { gsm_unregister_devices(gsm_tty_driver, gsm->num); gsm->has_devices = false; } for (i = NUM_DLCI - 1; i >= 0; i--) if (gsm->dlci[i]) gsm_dlci_release(gsm->dlci[i]); mutex_unlock(&gsm->mutex); /* Now wipe the queues */ tty_ldisc_flush(gsm->tty); list_for_each_entry_safe(txq, ntxq, &gsm->tx_ctrl_list, list) kfree(txq); INIT_LIST_HEAD(&gsm->tx_ctrl_list); list_for_each_entry_safe(txq, ntxq, &gsm->tx_data_list, list) kfree(txq); INIT_LIST_HEAD(&gsm->tx_data_list); } /** * gsm_activate_mux - generic GSM setup * @gsm: our mux * * Set up the bits of the mux which are the same for all framing * protocols. Add the mux to the mux table so it can be opened and * finally kick off connecting to DLCI 0 on the modem. */ static int gsm_activate_mux(struct gsm_mux *gsm) { struct gsm_dlci *dlci; int ret; dlci = gsm_dlci_alloc(gsm, 0); if (dlci == NULL) return -ENOMEM; if (gsm->encoding == GSM_BASIC_OPT) gsm->receive = gsm0_receive; else gsm->receive = gsm1_receive; ret = gsm_register_devices(gsm_tty_driver, gsm->num); if (ret) return ret; gsm->has_devices = true; gsm->dead = false; /* Tty opens are now permissible */ return 0; } /** * gsm_free_mux - free up a mux * @gsm: mux to free * * Dispose of allocated resources for a dead mux */ static void gsm_free_mux(struct gsm_mux *gsm) { int i; for (i = 0; i < MAX_MUX; i++) { if (gsm == gsm_mux[i]) { gsm_mux[i] = NULL; break; } } mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); kfree(gsm); } /** * gsm_free_muxr - free up a mux * @ref: kreference to the mux to free * * Dispose of allocated resources for a dead mux */ static void gsm_free_muxr(struct kref *ref) { struct gsm_mux *gsm = container_of(ref, struct gsm_mux, ref); gsm_free_mux(gsm); } static inline void mux_get(struct gsm_mux *gsm) { unsigned long flags; spin_lock_irqsave(&gsm_mux_lock, flags); kref_get(&gsm->ref); spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline void mux_put(struct gsm_mux *gsm) { unsigned long flags; spin_lock_irqsave(&gsm_mux_lock, flags); kref_put(&gsm->ref, gsm_free_muxr); spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline unsigned int mux_num_to_base(struct gsm_mux *gsm) { return gsm->num * NUM_DLCI; } static inline unsigned int mux_line_to_num(unsigned int line) { return line / NUM_DLCI; } /** * gsm_alloc_mux - allocate a mux * * Creates a new mux ready for activation. */ static struct gsm_mux *gsm_alloc_mux(void) { int i; struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL); if (gsm == NULL) return NULL; gsm->buf = kmalloc(MAX_MRU + 1, GFP_KERNEL); if (gsm->buf == NULL) { kfree(gsm); return NULL; } gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL); if (gsm->txframe == NULL) { kfree(gsm->buf); kfree(gsm); return NULL; } spin_lock_init(&gsm->lock); mutex_init(&gsm->mutex); kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); timer_setup(&gsm->ka_timer, gsm_control_keep_alive, 0); INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); spin_lock_init(&gsm->control_lock); spin_lock_init(&gsm->tx_lock); gsm->t1 = T1; gsm->t2 = T2; gsm->t3 = T3; gsm->n2 = N2; gsm->k = K; gsm->ftype = UIH; gsm->adaption = 1; gsm->encoding = GSM_ADV_OPT; gsm->mru = 64; /* Default to encoding 1 so these should be 64 */ gsm->mtu = 64; gsm->dead = true; /* Avoid early tty opens */ gsm->wait_config = false; /* Disabled */ gsm->keep_alive = 0; /* Disabled */ /* Store the instance to the mux array or abort if no space is * available. */ spin_lock(&gsm_mux_lock); for (i = 0; i < MAX_MUX; i++) { if (!gsm_mux[i]) { gsm_mux[i] = gsm; gsm->num = i; break; } } spin_unlock(&gsm_mux_lock); if (i == MAX_MUX) { mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); kfree(gsm); return NULL; } return gsm; } static void gsm_copy_config_values(struct gsm_mux *gsm, struct gsm_config *c) { memset(c, 0, sizeof(*c)); c->adaption = gsm->adaption; c->encapsulation = gsm->encoding; c->initiator = gsm->initiator; c->t1 = gsm->t1; c->t2 = gsm->t2; c->t3 = gsm->t3; c->n2 = gsm->n2; if (gsm->ftype == UIH) c->i = 1; else c->i = 2; pr_debug("Ftype %d i %d\n", gsm->ftype, c->i); c->mru = gsm->mru; c->mtu = gsm->mtu; c->k = gsm->k; } static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) { int need_close = 0; int need_restart = 0; /* Stuff we don't support yet - UI or I frame transport */ if (c->adaption != 1 && c->adaption != 2) return -EOPNOTSUPP; /* Check the MRU/MTU range looks sane */ if (c->mru < MIN_MTU || c->mtu < MIN_MTU) return -EINVAL; if (c->mru > MAX_MRU || c->mtu > MAX_MTU) return -EINVAL; if (c->t3 > MAX_T3) return -EINVAL; if (c->n2 > 255) return -EINVAL; if (c->encapsulation > 1) /* Basic, advanced, no I */ return -EINVAL; if (c->initiator > 1) return -EINVAL; if (c->k > MAX_WINDOW_SIZE) return -EINVAL; if (c->i == 0 || c->i > 2) /* UIH and UI only */ return -EINVAL; /* * See what is needed for reconfiguration */ /* Timing fields */ if (c->t1 != 0 && c->t1 != gsm->t1) need_restart = 1; if (c->t2 != 0 && c->t2 != gsm->t2) need_restart = 1; if (c->encapsulation != gsm->encoding) need_restart = 1; if (c->adaption != gsm->adaption) need_restart = 1; /* Requires care */ if (c->initiator != gsm->initiator) need_close = 1; if (c->mru != gsm->mru) need_restart = 1; if (c->mtu != gsm->mtu) need_restart = 1; /* * Close down what is needed, restart and initiate the new * configuration. On the first time there is no DLCI[0] * and closing or cleaning up is not necessary. */ if (need_close || need_restart) gsm_cleanup_mux(gsm, true); gsm->initiator = c->initiator; gsm->mru = c->mru; gsm->mtu = c->mtu; gsm->encoding = c->encapsulation ? GSM_ADV_OPT : GSM_BASIC_OPT; gsm->adaption = c->adaption; gsm->n2 = c->n2; if (c->i == 1) gsm->ftype = UIH; else if (c->i == 2) gsm->ftype = UI; if (c->t1) gsm->t1 = c->t1; if (c->t2) gsm->t2 = c->t2; if (c->t3) gsm->t3 = c->t3; if (c->k) gsm->k = c->k; /* * FIXME: We need to separate activation/deactivation from adding * and removing from the mux array */ if (gsm->dead) { int ret = gsm_activate_mux(gsm); if (ret) return ret; if (gsm->initiator) gsm_dlci_begin_open(gsm->dlci[0]); } return 0; } static void gsm_copy_config_ext_values(struct gsm_mux *gsm, struct gsm_config_ext *ce) { memset(ce, 0, sizeof(*ce)); ce->wait_config = gsm->wait_config ? 1 : 0; ce->keep_alive = gsm->keep_alive; } static int gsm_config_ext(struct gsm_mux *gsm, struct gsm_config_ext *ce) { bool need_restart = false; unsigned int i; /* * Check that userspace doesn't put stuff in here to prevent breakages * in the future. */ for (i = 0; i < ARRAY_SIZE(ce->reserved); i++) if (ce->reserved[i]) return -EINVAL; if (ce->flags & ~GSM_FL_RESTART) return -EINVAL; /* Requires care */ if (ce->flags & GSM_FL_RESTART) need_restart = true; /* * Close down what is needed, restart and initiate the new * configuration. On the first time there is no DLCI[0] * and closing or cleaning up is not necessary. */ if (need_restart) gsm_cleanup_mux(gsm, true); /* * Setup the new configuration values */ gsm->wait_config = ce->wait_config ? true : false; gsm->keep_alive = ce->keep_alive; if (gsm->dead) { int ret = gsm_activate_mux(gsm); if (ret) return ret; if (gsm->initiator) gsm_dlci_begin_open(gsm->dlci[0]); } return 0; } /** * gsmld_output - write to link * @gsm: our mux * @data: bytes to output * @len: size * * Write a block of data from the GSM mux to the data channel. This * will eventually be serialized from above but at the moment isn't. */ static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len) { if (tty_write_room(gsm->tty) < len) { set_bit(TTY_DO_WRITE_WAKEUP, &gsm->tty->flags); return -ENOSPC; } if (debug & DBG_DATA) gsm_hex_dump_bytes(__func__, data, len); return gsm->tty->ops->write(gsm->tty, data, len); } /** * gsmld_write_trigger - schedule ldisc write task * @gsm: our mux */ static void gsmld_write_trigger(struct gsm_mux *gsm) { if (!gsm || !gsm->dlci[0] || gsm->dlci[0]->dead) return; schedule_work(&gsm->tx_work); } /** * gsmld_write_task - ldisc write task * @work: our tx write work * * Writes out data to the ldisc if possible. We are doing this here to * avoid dead-locking. This returns if no space or data is left for output. */ static void gsmld_write_task(struct work_struct *work) { struct gsm_mux *gsm = container_of(work, struct gsm_mux, tx_work); unsigned long flags; int i, ret; /* All outstanding control channel and control messages and one data * frame is sent. */ ret = -ENODEV; spin_lock_irqsave(&gsm->tx_lock, flags); if (gsm->tty) ret = gsm_data_kick(gsm); spin_unlock_irqrestore(&gsm->tx_lock, flags); if (ret >= 0) for (i = 0; i < NUM_DLCI; i++) if (gsm->dlci[i]) tty_port_tty_wakeup(&gsm->dlci[i]->port); } /** * gsmld_attach_gsm - mode set up * @tty: our tty structure * @gsm: our mux * * Set up the MUX for basic mode and commence connecting to the * modem. Currently called from the line discipline set up but * will need moving to an ioctl path. */ static void gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) { gsm->tty = tty_kref_get(tty); /* Turn off tty XON/XOFF handling to handle it explicitly. */ gsm->old_c_iflag = tty->termios.c_iflag; tty->termios.c_iflag &= (IXON | IXOFF); } /** * gsmld_detach_gsm - stop doing 0710 mux * @tty: tty attached to the mux * @gsm: mux * * Shutdown and then clean up the resources used by the line discipline */ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) { WARN_ON(tty != gsm->tty); /* Restore tty XON/XOFF handling. */ gsm->tty->termios.c_iflag = gsm->old_c_iflag; tty_kref_put(gsm->tty); gsm->tty = NULL; } static void gsmld_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct gsm_mux *gsm = tty->disc_data; u8 flags = TTY_NORMAL; if (debug & DBG_DATA) gsm_hex_dump_bytes(__func__, cp, count); for (; count; count--, cp++) { if (fp) flags = *fp++; switch (flags) { case TTY_NORMAL: if (gsm->receive) gsm->receive(gsm, *cp); break; case TTY_OVERRUN: case TTY_BREAK: case TTY_PARITY: case TTY_FRAME: gsm_error(gsm); break; default: WARN_ONCE(1, "%s: unknown flag %d\n", tty_name(tty), flags); break; } } /* FASYNC if needed ? */ /* If clogged call tty_throttle(tty); */ } /** * gsmld_flush_buffer - clean input queue * @tty: terminal device * * Flush the input buffer. Called when the line discipline is * being closed, when the tty layer wants the buffer flushed (eg * at hangup). */ static void gsmld_flush_buffer(struct tty_struct *tty) { } /** * gsmld_close - close the ldisc for this tty * @tty: device * * Called from the terminal layer when this line discipline is * being shut down, either because of a close or becsuse of a * discipline change. The function will not be called while other * ldisc methods are in progress. */ static void gsmld_close(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; /* The ldisc locks and closes the port before calling our close. This * means we have no way to do a proper disconnect. We will not bother * to do one. */ gsm_cleanup_mux(gsm, false); gsmld_detach_gsm(tty, gsm); gsmld_flush_buffer(tty); /* Do other clean up here */ mux_put(gsm); } /** * gsmld_open - open an ldisc * @tty: terminal to open * * Called when this line discipline is being attached to the * terminal device. Can sleep. Called serialized so that no * other events will occur in parallel. No further open will occur * until a close. */ static int gsmld_open(struct tty_struct *tty) { struct gsm_mux *gsm; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EINVAL; /* Attach our ldisc data */ gsm = gsm_alloc_mux(); if (gsm == NULL) return -ENOMEM; tty->disc_data = gsm; tty->receive_room = 65536; /* Attach the initial passive connection */ gsmld_attach_gsm(tty, gsm); /* The mux will not be activated yet, we wait for correct * configuration first. */ if (gsm->encoding == GSM_BASIC_OPT) gsm->receive = gsm0_receive; else gsm->receive = gsm1_receive; return 0; } /** * gsmld_write_wakeup - asynchronous I/O notifier * @tty: tty device * * Required for the ptys, serial driver etc. since processes * that attach themselves to the master and rely on ASYNC * IO must be woken up */ static void gsmld_write_wakeup(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; /* Queue poll */ gsmld_write_trigger(gsm); } /** * gsmld_read - read function for tty * @tty: tty device * @file: file object * @buf: userspace buffer pointer * @nr: size of I/O * @cookie: unused * @offset: unused * * Perform reads for the line discipline. We are guaranteed that the * line discipline will not be closed under us but we may get multiple * parallel readers and must handle this ourselves. We may also get * a hangup. Always called in user context, may sleep. * * This code must be sure never to sleep through a hangup. */ static ssize_t gsmld_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t nr, void **cookie, unsigned long offset) { return -EOPNOTSUPP; } /** * gsmld_write - write function for tty * @tty: tty device * @file: file object * @buf: userspace buffer pointer * @nr: size of I/O * * Called when the owner of the device wants to send a frame * itself (or some other control data). The data is transferred * as-is and must be properly framed and checksummed as appropriate * by userspace. Frames are either sent whole or not at all as this * avoids pain user side. */ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, const u8 *buf, size_t nr) { struct gsm_mux *gsm = tty->disc_data; unsigned long flags; size_t space; int ret; if (!gsm) return -ENODEV; ret = -ENOBUFS; spin_lock_irqsave(&gsm->tx_lock, flags); space = tty_write_room(tty); if (space >= nr) ret = tty->ops->write(tty, buf, nr); else set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); spin_unlock_irqrestore(&gsm->tx_lock, flags); return ret; } /** * gsmld_poll - poll method for N_GSM0710 * @tty: terminal device * @file: file accessing it * @wait: poll table * * Called when the line discipline is asked to poll() for data or * for special events. This code is not serialized with respect to * other events save open/close. * * This code must be sure never to sleep through a hangup. * Called without the kernel lock held - fine */ static __poll_t gsmld_poll(struct tty_struct *tty, struct file *file, poll_table *wait) { __poll_t mask = 0; struct gsm_mux *gsm = tty->disc_data; poll_wait(file, &tty->read_wait, wait); poll_wait(file, &tty->write_wait, wait); if (gsm->dead) mask |= EPOLLHUP; if (tty_hung_up_p(file)) mask |= EPOLLHUP; if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= EPOLLHUP; if (!tty_is_writelocked(tty) && tty_write_room(tty) > 0) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } static int gsmld_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct gsm_config c; struct gsm_config_ext ce; struct gsm_dlci_config dc; struct gsm_mux *gsm = tty->disc_data; unsigned int base, addr; struct gsm_dlci *dlci; switch (cmd) { case GSMIOC_GETCONF: gsm_copy_config_values(gsm, &c); if (copy_to_user((void __user *)arg, &c, sizeof(c))) return -EFAULT; return 0; case GSMIOC_SETCONF: if (copy_from_user(&c, (void __user *)arg, sizeof(c))) return -EFAULT; return gsm_config(gsm, &c); case GSMIOC_GETFIRST: base = mux_num_to_base(gsm); return put_user(base + 1, (__u32 __user *)arg); case GSMIOC_GETCONF_EXT: gsm_copy_config_ext_values(gsm, &ce); if (copy_to_user((void __user *)arg, &ce, sizeof(ce))) return -EFAULT; return 0; case GSMIOC_SETCONF_EXT: if (copy_from_user(&ce, (void __user *)arg, sizeof(ce))) return -EFAULT; return gsm_config_ext(gsm, &ce); case GSMIOC_GETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel == 0 || dc.channel >= NUM_DLCI) return -EINVAL; addr = array_index_nospec(dc.channel, NUM_DLCI); dlci = gsm->dlci[addr]; if (!dlci) { dlci = gsm_dlci_alloc(gsm, addr); if (!dlci) return -ENOMEM; } gsm_dlci_copy_config_values(dlci, &dc); if (copy_to_user((void __user *)arg, &dc, sizeof(dc))) return -EFAULT; return 0; case GSMIOC_SETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel == 0 || dc.channel >= NUM_DLCI) return -EINVAL; addr = array_index_nospec(dc.channel, NUM_DLCI); dlci = gsm->dlci[addr]; if (!dlci) { dlci = gsm_dlci_alloc(gsm, addr); if (!dlci) return -ENOMEM; } return gsm_dlci_config(dlci, &dc, 0); default: return n_tty_ioctl_helper(tty, cmd, arg); } } /* * Network interface * */ static int gsm_mux_net_open(struct net_device *net) { pr_debug("%s called\n", __func__); netif_start_queue(net); return 0; } static int gsm_mux_net_close(struct net_device *net) { netif_stop_queue(net); return 0; } static void dlci_net_free(struct gsm_dlci *dlci) { if (!dlci->net) { WARN_ON(1); return; } dlci->adaption = dlci->prev_adaption; dlci->data = dlci->prev_data; free_netdev(dlci->net); dlci->net = NULL; } static void net_free(struct kref *ref) { struct gsm_mux_net *mux_net; struct gsm_dlci *dlci; mux_net = container_of(ref, struct gsm_mux_net, ref); dlci = mux_net->dlci; if (dlci->net) { unregister_netdev(dlci->net); dlci_net_free(dlci); } } static inline void muxnet_get(struct gsm_mux_net *mux_net) { kref_get(&mux_net->ref); } static inline void muxnet_put(struct gsm_mux_net *mux_net) { kref_put(&mux_net->ref, net_free); } static netdev_tx_t gsm_mux_net_start_xmit(struct sk_buff *skb, struct net_device *net) { struct gsm_mux_net *mux_net = netdev_priv(net); struct gsm_dlci *dlci = mux_net->dlci; muxnet_get(mux_net); skb_queue_head(&dlci->skb_list, skb); net->stats.tx_packets++; net->stats.tx_bytes += skb->len; gsm_dlci_data_kick(dlci); /* And tell the kernel when the last transmit started. */ netif_trans_update(net); muxnet_put(mux_net); return NETDEV_TX_OK; } /* called when a packet did not ack after watchdogtimeout */ static void gsm_mux_net_tx_timeout(struct net_device *net, unsigned int txqueue) { /* Tell syslog we are hosed. */ dev_dbg(&net->dev, "Tx timed out.\n"); /* Update statistics */ net->stats.tx_errors++; } static void gsm_mux_rx_netchar(struct gsm_dlci *dlci, const u8 *in_buf, int size) { struct net_device *net = dlci->net; struct sk_buff *skb; struct gsm_mux_net *mux_net = netdev_priv(net); muxnet_get(mux_net); /* Allocate an sk_buff */ skb = dev_alloc_skb(size + NET_IP_ALIGN); if (!skb) { /* We got no receive buffer. */ net->stats.rx_dropped++; muxnet_put(mux_net); return; } skb_reserve(skb, NET_IP_ALIGN); skb_put_data(skb, in_buf, size); skb->dev = net; skb->protocol = htons(ETH_P_IP); /* Ship it off to the kernel */ netif_rx(skb); /* update out statistics */ net->stats.rx_packets++; net->stats.rx_bytes += size; muxnet_put(mux_net); return; } static void gsm_mux_net_init(struct net_device *net) { static const struct net_device_ops gsm_netdev_ops = { .ndo_open = gsm_mux_net_open, .ndo_stop = gsm_mux_net_close, .ndo_start_xmit = gsm_mux_net_start_xmit, .ndo_tx_timeout = gsm_mux_net_tx_timeout, }; net->netdev_ops = &gsm_netdev_ops; /* fill in the other fields */ net->watchdog_timeo = GSM_NET_TX_TIMEOUT; net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; net->type = ARPHRD_NONE; net->tx_queue_len = 10; } /* caller holds the dlci mutex */ static void gsm_destroy_network(struct gsm_dlci *dlci) { struct gsm_mux_net *mux_net; pr_debug("destroy network interface\n"); if (!dlci->net) return; mux_net = netdev_priv(dlci->net); muxnet_put(mux_net); } /* caller holds the dlci mutex */ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc) { char *netname; int retval = 0; struct net_device *net; struct gsm_mux_net *mux_net; if (!capable(CAP_NET_ADMIN)) return -EPERM; /* Already in a non tty mode */ if (dlci->adaption > 2) return -EBUSY; if (nc->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; if (nc->adaption != 3 && nc->adaption != 4) return -EPROTONOSUPPORT; pr_debug("create network interface\n"); netname = "gsm%d"; if (nc->if_name[0] != '\0') netname = nc->if_name; net = alloc_netdev(sizeof(struct gsm_mux_net), netname, NET_NAME_UNKNOWN, gsm_mux_net_init); if (!net) { pr_err("alloc_netdev failed\n"); return -ENOMEM; } net->mtu = dlci->mtu; net->min_mtu = MIN_MTU; net->max_mtu = dlci->mtu; mux_net = netdev_priv(net); mux_net->dlci = dlci; kref_init(&mux_net->ref); strscpy(nc->if_name, net->name); /* return net name */ /* reconfigure dlci for network */ dlci->prev_adaption = dlci->adaption; dlci->prev_data = dlci->data; dlci->adaption = nc->adaption; dlci->data = gsm_mux_rx_netchar; dlci->net = net; pr_debug("register netdev\n"); retval = register_netdev(net); if (retval) { pr_err("network register fail %d\n", retval); dlci_net_free(dlci); return retval; } return net->ifindex; /* return network index */ } /* Line discipline for real tty */ static struct tty_ldisc_ops tty_ldisc_packet = { .owner = THIS_MODULE, .num = N_GSM0710, .name = "n_gsm", .open = gsmld_open, .close = gsmld_close, .flush_buffer = gsmld_flush_buffer, .read = gsmld_read, .write = gsmld_write, .ioctl = gsmld_ioctl, .poll = gsmld_poll, .receive_buf = gsmld_receive_buf, .write_wakeup = gsmld_write_wakeup }; /* * Virtual tty side */ /** * gsm_modem_upd_via_data - send modem bits via convergence layer * @dlci: channel * @brk: break signal * * Send an empty frame to signal mobile state changes and to transmit the * break signal for adaption 2. */ static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) { struct gsm_mux *gsm = dlci->gsm; unsigned long flags; if (dlci->state != DLCI_OPEN || dlci->adaption != 2) return; spin_lock_irqsave(&gsm->tx_lock, flags); gsm_dlci_modem_output(gsm, dlci, brk); spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** * gsm_modem_upd_via_msc - send modem bits via control frame * @dlci: channel * @brk: break signal */ static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk) { u8 modembits[3]; struct gsm_control *ctrl; int len = 2; if (dlci->gsm->encoding != GSM_BASIC_OPT) return 0; modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ if (!brk) { modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; } else { modembits[1] = gsm_encode_modem(dlci) << 1; modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */ len++; } ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len); if (ctrl == NULL) return -ENOMEM; return gsm_control_wait(dlci->gsm, ctrl); } /** * gsm_modem_update - send modem status line state * @dlci: channel * @brk: break signal */ static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk) { if (dlci->gsm->dead) return -EL2HLT; if (dlci->adaption == 2) { /* Send convergence layer type 2 empty data frame. */ gsm_modem_upd_via_data(dlci, brk); return 0; } else if (dlci->gsm->encoding == GSM_BASIC_OPT) { /* Send as MSC control message. */ return gsm_modem_upd_via_msc(dlci, brk); } /* Modem status lines are not supported. */ return -EPROTONOSUPPORT; } /** * gsm_wait_modem_change - wait for modem status line change * @dlci: channel * @mask: modem status line bits * * The function returns if: * - any given modem status line bit changed * - the wait event function got interrupted (e.g. by a signal) * - the underlying DLCI was closed * - the underlying ldisc device was removed */ static int gsm_wait_modem_change(struct gsm_dlci *dlci, u32 mask) { struct gsm_mux *gsm = dlci->gsm; u32 old = dlci->modem_rx; int ret; ret = wait_event_interruptible(gsm->event, gsm->dead || dlci->state != DLCI_OPEN || (old ^ dlci->modem_rx) & mask); if (gsm->dead) return -ENODEV; if (dlci->state != DLCI_OPEN) return -EL2NSYNC; return ret; } static bool gsm_carrier_raised(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); struct gsm_mux *gsm = dlci->gsm; /* Not yet open so no carrier info */ if (dlci->state != DLCI_OPEN) return false; if (debug & DBG_CD_ON) return true; /* * Basic mode with control channel in ADM mode may not respond * to CMD_MSC at all and modem_rx is empty. */ if (gsm->encoding == GSM_BASIC_OPT && gsm->dlci[0]->mode == DLCI_MODE_ADM && !dlci->modem_rx) return true; return dlci->modem_rx & TIOCM_CD; } static void gsm_dtr_rts(struct tty_port *port, bool active) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); unsigned int modem_tx = dlci->modem_tx; if (active) modem_tx |= TIOCM_DTR | TIOCM_RTS; else modem_tx &= ~(TIOCM_DTR | TIOCM_RTS); if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; gsm_modem_update(dlci, 0); } } static const struct tty_port_operations gsm_port_ops = { .carrier_raised = gsm_carrier_raised, .dtr_rts = gsm_dtr_rts, .destruct = gsm_dlci_free, }; static int gsmtty_install(struct tty_driver *driver, struct tty_struct *tty) { struct gsm_mux *gsm; struct gsm_dlci *dlci; unsigned int line = tty->index; unsigned int mux = mux_line_to_num(line); bool alloc = false; int ret; line = line & 0x3F; if (mux >= MAX_MUX) return -ENXIO; /* FIXME: we need to lock gsm_mux for lifetimes of ttys eventually */ if (gsm_mux[mux] == NULL) return -EUNATCH; if (line == 0 || line > 61) /* 62/63 reserved */ return -ECHRNG; gsm = gsm_mux[mux]; if (gsm->dead) return -EL2HLT; /* If DLCI 0 is not yet fully open return an error. This is ok from a locking perspective as we don't have to worry about this if DLCI0 is lost */ mutex_lock(&gsm->mutex); if (gsm->dlci[0] && gsm->dlci[0]->state != DLCI_OPEN) { mutex_unlock(&gsm->mutex); return -EL2NSYNC; } dlci = gsm->dlci[line]; if (dlci == NULL) { alloc = true; dlci = gsm_dlci_alloc(gsm, line); } if (dlci == NULL) { mutex_unlock(&gsm->mutex); return -ENOMEM; } ret = tty_port_install(&dlci->port, driver, tty); if (ret) { if (alloc) dlci_put(dlci); mutex_unlock(&gsm->mutex); return ret; } dlci_get(dlci); dlci_get(gsm->dlci[0]); mux_get(gsm); tty->driver_data = dlci; mutex_unlock(&gsm->mutex); return 0; } static int gsmtty_open(struct tty_struct *tty, struct file *filp) { struct gsm_dlci *dlci = tty->driver_data; struct tty_port *port = &dlci->port; port->count++; tty_port_tty_set(port, tty); dlci->modem_rx = 0; /* We could in theory open and close before we wait - eg if we get a DM straight back. This is ok as that will have caused a hangup */ tty_port_set_initialized(port, true); /* Start sending off SABM messages */ if (!dlci->gsm->wait_config) { /* Start sending off SABM messages */ if (dlci->gsm->initiator) gsm_dlci_begin_open(dlci); else gsm_dlci_set_opening(dlci); } else { gsm_dlci_set_wait_config(dlci); } /* And wait for virtual carrier */ return tty_port_block_til_ready(port, tty, filp); } static void gsmtty_close(struct tty_struct *tty, struct file *filp) { struct gsm_dlci *dlci = tty->driver_data; if (dlci == NULL) return; if (dlci->state == DLCI_CLOSED) return; mutex_lock(&dlci->mutex); gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); if (tty_port_close_start(&dlci->port, tty, filp) == 0) return; gsm_dlci_begin_close(dlci); if (tty_port_initialized(&dlci->port) && C_HUPCL(tty)) tty_port_lower_dtr_rts(&dlci->port); tty_port_close_end(&dlci->port, tty); tty_port_tty_set(&dlci->port, NULL); return; } static void gsmtty_hangup(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; tty_port_hangup(&dlci->port); gsm_dlci_begin_close(dlci); } static ssize_t gsmtty_write(struct tty_struct *tty, const u8 *buf, size_t len) { int sent; struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return -EINVAL; /* Stuff the bytes into the fifo queue */ sent = kfifo_in_locked(&dlci->fifo, buf, len, &dlci->lock); /* Need to kick the channel */ gsm_dlci_data_kick(dlci); return sent; } static unsigned int gsmtty_write_room(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return 0; return kfifo_avail(&dlci->fifo); } static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return 0; return kfifo_len(&dlci->fifo); } static void gsmtty_flush_buffer(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; unsigned long flags; if (dlci->state == DLCI_CLOSED) return; /* Caution needed: If we implement reliable transport classes then the data being transmitted can't simply be junked once it has first hit the stack. Until then we can just blow it away */ spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); spin_unlock_irqrestore(&dlci->lock, flags); /* Need to unhook this DLCI from the transmit queue logic */ } static void gsmtty_wait_until_sent(struct tty_struct *tty, int timeout) { /* The FIFO handles the queue so the kernel will do the right thing waiting on chars_in_buffer before calling us. No work to do here */ } static int gsmtty_tiocmget(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return -EINVAL; return dlci->modem_rx; } static int gsmtty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct gsm_dlci *dlci = tty->driver_data; unsigned int modem_tx = dlci->modem_tx; if (dlci->state == DLCI_CLOSED) return -EINVAL; modem_tx &= ~clear; modem_tx |= set; if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; return gsm_modem_update(dlci, 0); } return 0; } static int gsmtty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct gsm_dlci *dlci = tty->driver_data; struct gsm_netconfig nc; struct gsm_dlci_config dc; int index; if (dlci->state == DLCI_CLOSED) return -EINVAL; switch (cmd) { case GSMIOC_ENABLE_NET: if (copy_from_user(&nc, (void __user *)arg, sizeof(nc))) return -EFAULT; nc.if_name[IFNAMSIZ-1] = '\0'; /* return net interface index or error code */ mutex_lock(&dlci->mutex); index = gsm_create_network(dlci, &nc); mutex_unlock(&dlci->mutex); if (copy_to_user((void __user *)arg, &nc, sizeof(nc))) return -EFAULT; return index; case GSMIOC_DISABLE_NET: if (!capable(CAP_NET_ADMIN)) return -EPERM; mutex_lock(&dlci->mutex); gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); return 0; case GSMIOC_GETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel != dlci->addr) return -EPERM; gsm_dlci_copy_config_values(dlci, &dc); if (copy_to_user((void __user *)arg, &dc, sizeof(dc))) return -EFAULT; return 0; case GSMIOC_SETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel >= NUM_DLCI) return -EINVAL; if (dc.channel != 0 && dc.channel != dlci->addr) return -EPERM; return gsm_dlci_config(dlci, &dc, 1); case TIOCMIWAIT: return gsm_wait_modem_change(dlci, (u32)arg); default: return -ENOIOCTLCMD; } } static void gsmtty_set_termios(struct tty_struct *tty, const struct ktermios *old) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; /* For the moment its fixed. In actual fact the speed information for the virtual channel can be propogated in both directions by the RPN control message. This however rapidly gets nasty as we then have to remap modem signals each way according to whether our virtual cable is null modem etc .. */ tty_termios_copy_hw(&tty->termios, old); } static void gsmtty_throttle(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) dlci->modem_tx &= ~TIOCM_RTS; dlci->throttled = true; /* Send an MSC with RTS cleared */ gsm_modem_update(dlci, 0); } static void gsmtty_unthrottle(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) dlci->modem_tx |= TIOCM_RTS; dlci->throttled = false; /* Send an MSC with RTS set */ gsm_modem_update(dlci, 0); } static int gsmtty_break_ctl(struct tty_struct *tty, int state) { struct gsm_dlci *dlci = tty->driver_data; int encode = 0; /* Off */ if (dlci->state == DLCI_CLOSED) return -EINVAL; if (state == -1) /* "On indefinitely" - we can't encode this properly */ encode = 0x0F; else if (state > 0) { encode = state / 200; /* mS to encoding */ if (encode > 0x0F) encode = 0x0F; /* Best effort */ } return gsm_modem_update(dlci, encode); } static void gsmtty_cleanup(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; struct gsm_mux *gsm = dlci->gsm; dlci_put(dlci); dlci_put(gsm->dlci[0]); mux_put(gsm); } /* Virtual ttys for the demux */ static const struct tty_operations gsmtty_ops = { .install = gsmtty_install, .open = gsmtty_open, .close = gsmtty_close, .write = gsmtty_write, .write_room = gsmtty_write_room, .chars_in_buffer = gsmtty_chars_in_buffer, .flush_buffer = gsmtty_flush_buffer, .ioctl = gsmtty_ioctl, .throttle = gsmtty_throttle, .unthrottle = gsmtty_unthrottle, .set_termios = gsmtty_set_termios, .hangup = gsmtty_hangup, .wait_until_sent = gsmtty_wait_until_sent, .tiocmget = gsmtty_tiocmget, .tiocmset = gsmtty_tiocmset, .break_ctl = gsmtty_break_ctl, .cleanup = gsmtty_cleanup, }; static int __init gsm_init(void) { /* Fill in our line protocol discipline, and register it */ int status = tty_register_ldisc(&tty_ldisc_packet); if (status != 0) { pr_err("n_gsm: can't register line discipline (err = %d)\n", status); return status; } gsm_tty_driver = tty_alloc_driver(GSM_TTY_MINORS, TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_HARDWARE_BREAK); if (IS_ERR(gsm_tty_driver)) { pr_err("gsm_init: tty allocation failed.\n"); status = PTR_ERR(gsm_tty_driver); goto err_unreg_ldisc; } gsm_tty_driver->driver_name = "gsmtty"; gsm_tty_driver->name = "gsmtty"; gsm_tty_driver->major = 0; /* Dynamic */ gsm_tty_driver->minor_start = 0; gsm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; gsm_tty_driver->subtype = SERIAL_TYPE_NORMAL; gsm_tty_driver->init_termios = tty_std_termios; /* Fixme */ gsm_tty_driver->init_termios.c_lflag &= ~ECHO; tty_set_operations(gsm_tty_driver, &gsmtty_ops); if (tty_register_driver(gsm_tty_driver)) { pr_err("gsm_init: tty registration failed.\n"); status = -EBUSY; goto err_put_driver; } pr_debug("gsm_init: loaded as %d,%d.\n", gsm_tty_driver->major, gsm_tty_driver->minor_start); return 0; err_put_driver: tty_driver_kref_put(gsm_tty_driver); err_unreg_ldisc: tty_unregister_ldisc(&tty_ldisc_packet); return status; } static void __exit gsm_exit(void) { tty_unregister_ldisc(&tty_ldisc_packet); tty_unregister_driver(gsm_tty_driver); tty_driver_kref_put(gsm_tty_driver); } module_init(gsm_init); module_exit(gsm_exit); MODULE_DESCRIPTION("GSM 0710 tty multiplexor"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_GSM0710);
3 4 2 2 4 1 1 5 1 4 2 1 1 1 1 1 1 2 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 // SPDX-License-Identifier: GPL-2.0-only /* * TCP Vegas congestion control * * This is based on the congestion detection/avoidance scheme described in * Lawrence S. Brakmo and Larry L. Peterson. * "TCP Vegas: End to end congestion avoidance on a global internet." * IEEE Journal on Selected Areas in Communication, 13(8):1465--1480, * October 1995. Available from: * ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps * * See http://www.cs.arizona.edu/xkernel/ for their implementation. * The main aspects that distinguish this implementation from the * Arizona Vegas implementation are: * o We do not change the loss detection or recovery mechanisms of * Linux in any way. Linux already recovers from losses quite well, * using fine-grained timers, NewReno, and FACK. * o To avoid the performance penalty imposed by increasing cwnd * only every-other RTT during slow start, we increase during * every RTT during slow start, just like Reno. * o Largely to allow continuous cwnd growth during slow start, * we use the rate at which ACKs come back as the "actual" * rate, rather than the rate at which data is sent. * o To speed convergence to the right rate, we set the cwnd * to achieve the right ("actual") rate when we exit slow start. * o To filter out the noise caused by delayed ACKs, we use the * minimum RTT sample observed during the last RTT to calculate * the actual rate. * o When the sender re-starts from idle, it waits until it has * received ACKs for an entire flight of new data before making * a cwnd adjustment decision. The original Vegas implementation * assumed senders never went idle. */ #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/inet_diag.h> #include <net/tcp.h> #include "tcp_vegas.h" static int alpha = 2; static int beta = 4; static int gamma = 1; module_param(alpha, int, 0644); MODULE_PARM_DESC(alpha, "lower bound of packets in network"); module_param(beta, int, 0644); MODULE_PARM_DESC(beta, "upper bound of packets in network"); module_param(gamma, int, 0644); MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)"); /* There are several situations when we must "re-start" Vegas: * * o when a connection is established * o after an RTO * o after fast recovery * o when we send a packet and there is no outstanding * unacknowledged data (restarting an idle connection) * * In these circumstances we cannot do a Vegas calculation at the * end of the first RTT, because any calculation we do is using * stale info -- both the saved cwnd and congestion feedback are * stale. * * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ static void vegas_enable(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); /* Begin taking Vegas samples next time we send something. */ vegas->doing_vegas_now = 1; /* Set the beginning of the next send window. */ vegas->beg_snd_nxt = tp->snd_nxt; vegas->cntRTT = 0; vegas->minRTT = 0x7fffffff; } /* Stop taking Vegas samples for now. */ static inline void vegas_disable(struct sock *sk) { struct vegas *vegas = inet_csk_ca(sk); vegas->doing_vegas_now = 0; } void tcp_vegas_init(struct sock *sk) { struct vegas *vegas = inet_csk_ca(sk); vegas->baseRTT = 0x7fffffff; vegas_enable(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_init); /* Do RTT sampling needed for Vegas. * Basically we: * o min-filter RTT samples from within an RTT to get the current * propagation delay + queuing delay (we are min-filtering to try to * avoid the effects of delayed ACKs) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct vegas *vegas = inet_csk_ca(sk); u32 vrtt; if (sample->rtt_us < 0) return; /* Never allow zero rtt or baseRTT */ vrtt = sample->rtt_us + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) vegas->baseRTT = vrtt; /* Find the min RTT during the last RTT to find * the current prop. delay + queuing delay: */ vegas->minRTT = min(vegas->minRTT, vrtt); vegas->cntRTT++; } EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked); void tcp_vegas_state(struct sock *sk, u8 ca_state) { if (ca_state == TCP_CA_Open) vegas_enable(sk); else vegas_disable(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_state); /* * If the connection is idle and we are restarting, * then we don't want to do any Vegas calculations * until we get fresh RTT samples. So when we * restart, we reset our Vegas state to a clean * slate. After we get acks for this flight of * packets, _then_ we can make Vegas calculations * again. */ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) tcp_vegas_init(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { return min(tp->snd_ssthresh, tcp_snd_cwnd(tp)); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) { tcp_reno_cong_avoid(sk, ack, acked); return; } if (after(ack, vegas->beg_snd_nxt)) { /* Do the Vegas once-per-RTT cwnd adjustment. */ /* Save the extent of the current window so we can use this * at the end of the next RTT. */ vegas->beg_snd_nxt = tp->snd_nxt; /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got * at least one RTT sample that wasn't from a delayed ACK. * If we only had 2 samples total, * then that means we're getting only 1 ACK per RTT, which * means they're almost certainly delayed ACKs. * If we have 3 samples, we should be OK. */ if (vegas->cntRTT <= 2) { /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ tcp_reno_cong_avoid(sk, ack, acked); } else { u32 rtt, diff; u64 target_cwnd; /* We have enough RTT samples, so, using the Vegas * algorithm, we determine if we should increase or * decrease cwnd, and by how much. */ /* Pluck out the RTT we are using for the Vegas * calculations. This is the min RTT seen during the * last RTT. Taking the min filters out the effects * of delayed ACKs, at the cost of noticing congestion * a bit later. */ rtt = vegas->minRTT; /* Calculate the cwnd we should have, if we weren't * going too fast. * * This is: * (actual rate in segments) * baseRTT */ target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT; do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity * is the "Diff" from the Arizona Vegas papers. */ diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT; if (diff > gamma && tcp_in_slow_start(tp)) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ /* Set cwnd to match the actual rate * exactly: * cwnd = (actual rate) * baseRTT * Then we add 1 because the integer * truncation robs us of full link * utilization. */ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), (u32)target_cwnd + 1)); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tcp_in_slow_start(tp)) { /* Slow start. */ tcp_slow_start(tp, acked); } else { /* Congestion avoidance. */ /* Figure out where we would like cwnd * to be. */ if (diff > beta) { /* The old window was too fast, so * we slow down. */ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. */ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); } else { /* Sending just as fast as we * should be. */ } } if (tcp_snd_cwnd(tp) < 2) tcp_snd_cwnd_set(tp, 2); else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp) tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp); tp->snd_ssthresh = tcp_current_ssthresh(sk); } /* Wipe the slate clean for the next RTT. */ vegas->cntRTT = 0; vegas->minRTT = 0x7fffffff; } /* Use normal slow start */ else if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); } /* Extract info for Tcp socket info provided via netlink. */ size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct vegas *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { info->vegas.tcpv_enabled = ca->doing_vegas_now; info->vegas.tcpv_rttcnt = ca->cntRTT; info->vegas.tcpv_rtt = ca->baseRTT; info->vegas.tcpv_minrtt = ca->minRTT; *attr = INET_DIAG_VEGASINFO; return sizeof(struct tcpvegas_info); } return 0; } EXPORT_SYMBOL_GPL(tcp_vegas_get_info); static struct tcp_congestion_ops tcp_vegas __read_mostly = { .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_vegas_cong_avoid, .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, .owner = THIS_MODULE, .name = "vegas", }; static int __init tcp_vegas_register(void) { BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } static void __exit tcp_vegas_unregister(void) { tcp_unregister_congestion_control(&tcp_vegas); } module_init(tcp_vegas_register); module_exit(tcp_vegas_unregister); MODULE_AUTHOR("Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("TCP Vegas");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 /* SPDX-License-Identifier: GPL-2.0 */ /* Interface for implementing AF_XDP zero-copy support in drivers. * Copyright(c) 2020 Intel Corporation. */ #ifndef _LINUX_XDP_SOCK_DRV_H #define _LINUX_XDP_SOCK_DRV_H #include <net/xdp_sock.h> #include <net/xsk_buff_pool.h> #define XDP_UMEM_MIN_CHUNK_SHIFT 11 #define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT) struct xsk_cb_desc { void *src; u8 off; u8 bytes; }; #ifdef CONFIG_XDP_SOCKETS void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); void xsk_tx_release(struct xsk_buff_pool *pool); struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id); void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool); void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool); void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool); void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool); bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool); static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) { return XDP_PACKET_HEADROOM + pool->headroom; } static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool) { return pool->chunk_size; } static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) { return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { xp_set_rxq_info(pool, rxq); } static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc) { xp_fill_cb(pool, desc); } static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { #ifdef CONFIG_NET_RX_BUSY_POLL return pool->heads[0].xdp.rxq->napi_id; #else return 0; #endif } static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { xp_dma_unmap(pool, attrs); } static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs) { struct xdp_umem *umem = pool->umem; return xp_dma_map(pool, dev, attrs, umem->pgs, umem->npgs); } static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); return xp_get_dma(xskb); } static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); return xp_get_frame_dma(xskb); } static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) { return xp_alloc(pool); } static inline bool xsk_is_eop_desc(struct xdp_desc *desc) { return !xp_mb_desc(desc); } /* Returns as many entries as possible up to max. 0 <= N <= max. */ static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { return xp_alloc_batch(pool, xdp, max); } static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { return xp_can_alloc(pool, count); } static inline void xsk_buff_free(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); struct list_head *xskb_list = &xskb->pool->xskb_list; struct xdp_buff_xsk *pos, *tmp; if (likely(!xdp_buff_has_frags(xdp))) goto out; list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) { list_del(&pos->xskb_list_node); xp_free(pos); } xdp_get_shared_info_from_buff(xdp)->nr_frags = 0; out: xp_free(xskb); } static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list); } static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) { struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); struct xdp_buff *ret = NULL; struct xdp_buff_xsk *frag; frag = list_first_entry_or_null(&xskb->pool->xskb_list, struct xdp_buff_xsk, xskb_list_node); if (frag) { list_del(&frag->xskb_list_node); ret = &frag->xdp; } return ret; } static inline void xsk_buff_del_tail(struct xdp_buff *tail) { struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp); list_del(&xskb->xskb_list_node); } static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) { struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); struct xdp_buff_xsk *frag; frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk, xskb_list_node); return &frag->xdp; } static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; xdp->data_meta = xdp->data; xdp->data_end = xdp->data + size; xdp->flags = 0; } static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) { return xp_raw_get_dma(pool, addr); } static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) { return xp_raw_get_data(pool, addr); } #define XDP_TXMD_FLAGS_VALID ( \ XDP_TXMD_FLAGS_TIMESTAMP | \ XDP_TXMD_FLAGS_CHECKSUM | \ 0) static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) { return !(meta->flags & ~XDP_TXMD_FLAGS_VALID); } static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) { struct xsk_tx_metadata *meta; if (!pool->tx_metadata_len) return NULL; meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len; if (unlikely(!xsk_buff_valid_tx_metadata(meta))) return NULL; /* no way to signal the error to the user */ return meta; } static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); xp_dma_sync_for_cpu(xskb); } static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { xp_dma_sync_for_device(pool, dma, size); } #else static inline void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { } static inline bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { return false; } static inline u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max) { return 0; } static inline void xsk_tx_release(struct xsk_buff_pool *pool) { } static inline struct xsk_buff_pool * xsk_get_pool_from_qid(struct net_device *dev, u16 queue_id) { return NULL; } static inline void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { } static inline void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool) { } static inline void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool) { } static inline void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool) { } static inline bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool) { return false; } static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool) { return 0; } static inline u32 xsk_pool_get_chunk_size(struct xsk_buff_pool *pool) { return 0; } static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) { return 0; } static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { } static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc) { } static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool) { return 0; } static inline void xsk_pool_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { } static inline int xsk_pool_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs) { return 0; } static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp) { return 0; } static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp) { return 0; } static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) { return NULL; } static inline bool xsk_is_eop_desc(struct xdp_desc *desc) { return false; } static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { return 0; } static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { return false; } static inline void xsk_buff_free(struct xdp_buff *xdp) { } static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { } static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) { return NULL; } static inline void xsk_buff_del_tail(struct xdp_buff *tail) { } static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first) { return NULL; } static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) { return 0; } static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) { return NULL; } static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) { return false; } static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) { return NULL; } static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp) { } static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma, size_t size) { } #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_DRV_H */
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 #ifndef _NF_TPROXY_H_ #define _NF_TPROXY_H_ #include <net/tcp.h> enum nf_tproxy_lookup_t { NF_TPROXY_LOOKUP_LISTENER, NF_TPROXY_LOOKUP_ESTABLISHED, }; static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) { if (inet_sk_transparent(sk)) return true; sock_gen_put(sk); return false; } static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) { local_bh_disable(); inet_twsk_deschedule_put(tw); local_bh_enable(); } /* assign a socket to the skb -- consumes sk */ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { skb_orphan(skb); skb->sk = sk; skb->destructor = sock_edemux; } __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); /** * nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections * @skb: The skb being processed. * @laddr: IPv4 address to redirect to or zero. * @lport: TCP port to redirect to or zero. * @sk: The TIME_WAIT TCP socket found by the lookup. * * We have to handle SYN packets arriving to TIME_WAIT sockets * differently: instead of reopening the connection we should rather * redirect the new connection to the proxy if there's a listener * socket present. * * nf_tproxy_handle_time_wait4() consumes the socket reference passed in. * * Returns the listener socket if there's one, the TIME_WAIT socket if * no such listener is found, or NULL if the TCP header is incomplete. */ struct sock * nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, struct sock *sk); /* * This is used when the user wants to intercept a connection matching * an explicit iptables rule. In this case the sockets are assumed * matching in preference order: * * - match: if there's a fully established connection matching the * _packet_ tuple, it is returned, assuming the redirection * already took place and we process a packet belonging to an * established connection * * - match: if there's a listening socket matching the redirection * (e.g. on-port & on-ip of the connection), it is returned, * regardless if it was bound to 0.0.0.0 or an explicit * address. The reasoning is that if there's an explicit rule, it * does not really matter if the listener is bound to an interface * or to 0. The user already stated that he wants redirection * (since he added the rule). * * Please note that there's an overlap between what a TPROXY target * and a socket match will match. Normally if you have both rules the * "socket" match will be the first one, effectively all packets * belonging to established connections going through that one. */ struct sock * nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type); const struct in6_addr * nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, const struct in6_addr *daddr); /** * nf_tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections * @skb: The skb being processed. * @tproto: Transport protocol. * @thoff: Transport protocol header offset. * @net: Network namespace. * @laddr: IPv6 address to redirect to. * @lport: TCP port to redirect to or zero. * @sk: The TIME_WAIT TCP socket found by the lookup. * * We have to handle SYN packets arriving to TIME_WAIT sockets * differently: instead of reopening the connection we should rather * redirect the new connection to the proxy if there's a listener * socket present. * * nf_tproxy_handle_time_wait6() consumes the socket reference passed in. * * Returns the listener socket if there's one, the TIME_WAIT socket if * no such listener is found, or NULL if the TCP header is incomplete. */ struct sock * nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, struct net *net, const struct in6_addr *laddr, const __be16 lport, struct sock *sk); struct sock * nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, const u8 protocol, const struct in6_addr *saddr, const struct in6_addr *daddr, const __be16 sport, const __be16 dport, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type); #endif /* _NF_TPROXY_H_ */
10 2 2 9 11 12 2 9 9 9 9 1 1 2 4 9 9 1 1 5 9 9 9 9 9 9 9 9 9 9 9 9 13 13 9 9 13 1 1 1 1 1 1 1 1 1 1 13 13 3 1 9 13 13 12 1 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 // SPDX-License-Identifier: GPL-2.0-or-later /* * Z-Star/Vimicro zc301/zc302p/vc30x driver * * Copyright (C) 2009-2012 Jean-Francois Moine <http://moinejf.free.fr> * Copyright (C) 2004 2005 2006 Michel Xhaard mxhaard@magic.fr */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/input.h> #include "gspca.h" #include "jpeg.h" MODULE_AUTHOR("Jean-Francois Moine <http://moinejf.free.fr>, Serge A. Suchkov <Serge.A.S@tochka.ru>"); MODULE_DESCRIPTION("GSPCA ZC03xx/VC3xx USB Camera Driver"); MODULE_LICENSE("GPL"); static int force_sensor = -1; #define REG08_DEF 3 /* default JPEG compression (75%) */ #include "zc3xx-reg.h" /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct { /* gamma/brightness/contrast control cluster */ struct v4l2_ctrl *gamma; struct v4l2_ctrl *brightness; struct v4l2_ctrl *contrast; }; struct { /* autogain/exposure control cluster */ struct v4l2_ctrl *autogain; struct v4l2_ctrl *exposure; }; struct v4l2_ctrl *plfreq; struct v4l2_ctrl *sharpness; struct v4l2_ctrl *jpegqual; struct work_struct work; u8 reg08; /* webcam compression quality */ u8 bridge; u8 sensor; /* Type of image sensor chip */ u16 chip_revision; u8 jpeg_hdr[JPEG_HDR_SZ]; }; enum bridges { BRIDGE_ZC301, BRIDGE_ZC303, }; enum sensors { SENSOR_ADCM2700, SENSOR_CS2102, SENSOR_CS2102K, SENSOR_GC0303, SENSOR_GC0305, SENSOR_HDCS2020, SENSOR_HV7131B, SENSOR_HV7131R, SENSOR_ICM105A, SENSOR_MC501CB, SENSOR_MT9V111_1, /* (mi360soc) zc301 */ SENSOR_MT9V111_3, /* (mi360soc) zc303 */ SENSOR_OV7620, /* OV7648 - same values */ SENSOR_OV7630C, SENSOR_PAS106, SENSOR_PAS202B, SENSOR_PB0330, SENSOR_PO2030, SENSOR_TAS5130C, SENSOR_MAX }; static const struct v4l2_pix_format vga_mode[] = { {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; static const struct v4l2_pix_format broken_vga_mode[] = { {320, 232, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 232 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {640, 472, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 472 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; static const struct v4l2_pix_format sif_mode[] = { {176, 144, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, {352, 288, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 0}, }; /* * Bridge reg08 bits 1-2 -> JPEG quality conversion table. Note the highest * quality setting is not usable as USB 1 does not have enough bandwidth. */ static u8 jpeg_qual[] = {50, 75, 87, /* 94 */}; /* usb exchanges */ struct usb_action { u8 req; u8 val; u16 idx; }; static const struct usb_action adcm2700_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x04, ZC3XX_R002_CLOCKSELECT}, /* 00,02,04,cc */ {0xa0, 0x00, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xa0, 0xd3, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,d3,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xd8, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,d8,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xde, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,de,cc */ {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,86,cc */ {0xbb, 0x00, 0x0400}, /* 04,00,00,bb */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xbb, 0x0f, 0x140f}, /* 14,0f,0f,bb */ {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,37,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x58, ZC3XX_R116_RGAIN}, /* 01,16,58,cc */ {0xa0, 0x5a, ZC3XX_R118_BGAIN}, /* 01,18,5a,cc */ {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,02,cc */ {0xa0, 0xd3, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,d3,cc */ {0xbb, 0x00, 0x0408}, /* 04,00,08,bb */ {0xdd, 0x00, 0x0200}, /* 00,02,00,dd */ {0xbb, 0x00, 0x0400}, /* 04,00,00,bb */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xbb, 0x0f, 0x140f}, /* 14,0f,0f,bb */ {0xbb, 0xe0, 0x0c2e}, /* 0c,e0,2e,bb */ {0xbb, 0x01, 0x2000}, /* 20,01,00,bb */ {0xbb, 0x96, 0x2400}, /* 24,96,00,bb */ {0xbb, 0x06, 0x1006}, /* 10,06,06,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xaa, 0xfe, 0x0002}, /* 00,fe,02,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xbb, 0x5f, 0x2090}, /* 20,5f,90,bb */ {0xbb, 0x01, 0x8000}, /* 80,01,00,bb */ {0xbb, 0x09, 0x8400}, /* 84,09,00,bb */ {0xbb, 0x86, 0x0002}, /* 00,86,02,bb */ {0xbb, 0xe6, 0x0401}, /* 04,e6,01,bb */ {0xbb, 0x86, 0x0802}, /* 08,86,02,bb */ {0xbb, 0xe6, 0x0c01}, /* 0c,e6,01,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xaa, 0xfe, 0x0000}, /* 00,fe,00,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0020}, /* 00,fe,20,aa */ /*mswin+*/ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, {0xaa, 0xfe, 0x0002}, {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, {0xaa, 0xb4, 0xcd37}, {0xaa, 0xa4, 0x0004}, {0xaa, 0xa8, 0x0007}, {0xaa, 0xac, 0x0004}, /*mswin-*/ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xaa, 0xfe, 0x0000}, /* 00,fe,00,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xbb, 0x04, 0x0400}, /* 04,04,00,bb */ {0xdd, 0x00, 0x0100}, /* 00,01,00,dd */ {0xbb, 0x01, 0x0400}, /* 04,01,00,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0002}, /* 00,fe,02,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xbb, 0x41, 0x2803}, /* 28,41,03,bb */ {0xbb, 0x40, 0x2c03}, /* 2c,40,03,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0010}, /* 00,fe,10,aa */ {} }; static const struct usb_action adcm2700_InitialScale[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, /* 00,02,10,cc */ {0xa0, 0x00, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xa0, 0xd3, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,d3,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xd0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,d0,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xd8, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,d8,cc */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,88,cc */ {0xbb, 0x00, 0x0400}, /* 04,00,00,bb */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xbb, 0x0f, 0x140f}, /* 14,0f,0f,bb */ {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,37,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x58, ZC3XX_R116_RGAIN}, /* 01,16,58,cc */ {0xa0, 0x5a, ZC3XX_R118_BGAIN}, /* 01,18,5a,cc */ {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,02,cc */ {0xa0, 0xd3, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,d3,cc */ {0xbb, 0x00, 0x0408}, /* 04,00,08,bb */ {0xdd, 0x00, 0x0200}, /* 00,02,00,dd */ {0xbb, 0x00, 0x0400}, /* 04,00,00,bb */ {0xdd, 0x00, 0x0050}, /* 00,00,50,dd */ {0xbb, 0x0f, 0x140f}, /* 14,0f,0f,bb */ {0xbb, 0xe0, 0x0c2e}, /* 0c,e0,2e,bb */ {0xbb, 0x01, 0x2000}, /* 20,01,00,bb */ {0xbb, 0x96, 0x2400}, /* 24,96,00,bb */ {0xbb, 0x06, 0x1006}, /* 10,06,06,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xaa, 0xfe, 0x0002}, /* 00,fe,02,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xbb, 0x5f, 0x2090}, /* 20,5f,90,bb */ {0xbb, 0x01, 0x8000}, /* 80,01,00,bb */ {0xbb, 0x09, 0x8400}, /* 84,09,00,bb */ {0xbb, 0x86, 0x0002}, /* 00,88,02,bb */ {0xbb, 0xe6, 0x0401}, /* 04,e6,01,bb */ {0xbb, 0x86, 0x0802}, /* 08,88,02,bb */ {0xbb, 0xe6, 0x0c01}, /* 0c,e6,01,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xaa, 0xfe, 0x0000}, /* 00,fe,00,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0020}, /* 00,fe,20,aa */ /*******/ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xaa, 0xfe, 0x0000}, /* 00,fe,00,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xdd, 0x00, 0x0010}, /* 00,00,10,dd */ {0xbb, 0x04, 0x0400}, /* 04,04,00,bb */ {0xdd, 0x00, 0x0100}, /* 00,01,00,dd */ {0xbb, 0x01, 0x0400}, /* 04,01,00,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0002}, /* 00,fe,02,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xbb, 0x41, 0x2803}, /* 28,41,03,bb */ {0xbb, 0x40, 0x2c03}, /* 2c,40,03,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0010}, /* 00,fe,10,aa */ {} }; static const struct usb_action adcm2700_50HZ[] = { {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0002}, /* 00,fe,02,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xbb, 0x05, 0x8400}, /* 84,05,00,bb */ {0xbb, 0xd0, 0xb007}, /* b0,d0,07,bb */ {0xbb, 0xa0, 0xb80f}, /* b8,a0,0f,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0010}, /* 00,fe,10,aa */ {0xaa, 0x26, 0x00d0}, /* 00,26,d0,aa */ {0xaa, 0x28, 0x0002}, /* 00,28,02,aa */ {} }; static const struct usb_action adcm2700_60HZ[] = { {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0002}, /* 00,fe,02,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xbb, 0x07, 0x8400}, /* 84,07,00,bb */ {0xbb, 0x82, 0xb006}, /* b0,82,06,bb */ {0xbb, 0x04, 0xb80d}, /* b8,04,0d,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0010}, /* 00,fe,10,aa */ {0xaa, 0x26, 0x0057}, /* 00,26,57,aa */ {0xaa, 0x28, 0x0002}, /* 00,28,02,aa */ {} }; static const struct usb_action adcm2700_NoFlicker[] = { {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0002}, /* 00,fe,02,aa */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0a,cc */ {0xbb, 0x07, 0x8400}, /* 84,07,00,bb */ {0xbb, 0x05, 0xb000}, /* b0,05,00,bb */ {0xbb, 0xa0, 0xb801}, /* b8,a0,01,bb */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xaa, 0xfe, 0x0010}, /* 00,fe,10,aa */ {} }; static const struct usb_action cs2102_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x00, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x20, ZC3XX_R080_HBLANKHIGH}, {0xa0, 0x21, ZC3XX_R081_HBLANKLOW}, {0xa0, 0x30, ZC3XX_R083_RGAINADDR}, {0xa0, 0x31, ZC3XX_R084_GGAINADDR}, {0xa0, 0x32, ZC3XX_R085_BGAINADDR}, {0xa0, 0x23, ZC3XX_R086_EXPTIMEHIGH}, {0xa0, 0x24, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x25, ZC3XX_R088_EXPTIMELOW}, {0xa0, 0xb3, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00 */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xaa, 0x02, 0x0008}, {0xaa, 0x03, 0x0000}, {0xaa, 0x11, 0x0000}, {0xaa, 0x12, 0x0089}, {0xaa, 0x13, 0x0000}, {0xaa, 0x14, 0x00e9}, {0xaa, 0x20, 0x0000}, {0xaa, 0x22, 0x0000}, {0xaa, 0x0b, 0x0004}, {0xaa, 0x30, 0x0030}, {0xaa, 0x31, 0x0030}, {0xaa, 0x32, 0x0030}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x10, 0x01ae}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x68, ZC3XX_R18D_YTARGET}, {0xa0, 0x00, 0x01ad}, {} }; static const struct usb_action cs2102_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x00, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x20, ZC3XX_R080_HBLANKHIGH}, {0xa0, 0x21, ZC3XX_R081_HBLANKLOW}, {0xa0, 0x30, ZC3XX_R083_RGAINADDR}, {0xa0, 0x31, ZC3XX_R084_GGAINADDR}, {0xa0, 0x32, ZC3XX_R085_BGAINADDR}, {0xa0, 0x23, ZC3XX_R086_EXPTIMEHIGH}, {0xa0, 0x24, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x25, ZC3XX_R088_EXPTIMELOW}, {0xa0, 0xb3, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00 */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xaa, 0x02, 0x0008}, {0xaa, 0x03, 0x0000}, {0xaa, 0x11, 0x0001}, {0xaa, 0x12, 0x0087}, {0xaa, 0x13, 0x0001}, {0xaa, 0x14, 0x00e7}, {0xaa, 0x20, 0x0000}, {0xaa, 0x22, 0x0000}, {0xaa, 0x0b, 0x0004}, {0xaa, 0x30, 0x0030}, {0xaa, 0x31, 0x0030}, {0xaa, 0x32, 0x0030}, {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x15, 0x01ae}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x68, ZC3XX_R18D_YTARGET}, {0xa0, 0x00, 0x01ad}, {} }; static const struct usb_action cs2102_50HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x23, 0x0001}, {0xaa, 0x24, 0x005f}, {0xaa, 0x25, 0x0090}, {0xaa, 0x21, 0x00dd}, {0xa0, 0x02, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0xbf, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x20, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x3a, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x98, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xdd, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xe4, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf0, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action cs2102_50HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x23, 0x0000}, {0xaa, 0x24, 0x00af}, {0xaa, 0x25, 0x00c8}, {0xaa, 0x21, 0x0068}, {0xa0, 0x01, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x5f, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x90, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x1d, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x4c, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x68, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xe3, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf0, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action cs2102_60HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x23, 0x0001}, {0xaa, 0x24, 0x0055}, {0xaa, 0x25, 0x00cc}, {0xaa, 0x21, 0x003f}, {0xa0, 0x02, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0xab, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x98, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x30, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0xd4, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x39, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x70, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xb0, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action cs2102_60HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x23, 0x0000}, {0xaa, 0x24, 0x00aa}, {0xaa, 0x25, 0x00e6}, {0xaa, 0x21, 0x003f}, {0xa0, 0x01, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x55, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xcc, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x18, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x6a, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x3f, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xa5, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf0, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action cs2102_NoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x23, 0x0001}, {0xaa, 0x24, 0x005f}, {0xaa, 0x25, 0x0000}, {0xaa, 0x21, 0x0001}, {0xa0, 0x02, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0xbf, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x80, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x01, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x40, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xa0, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action cs2102_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x23, 0x0000}, {0xaa, 0x24, 0x00af}, {0xaa, 0x25, 0x0080}, {0xaa, 0x21, 0x0001}, {0xa0, 0x01, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x5f, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x80, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x80, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x01, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x40, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xa0, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; /* CS2102_KOCOM */ static const struct usb_action cs2102K_InitialScale[] = { {0xa0, 0x11, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x08, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, {0xa0, 0x55, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0a, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0b, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0c, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x7c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0d, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0xa3, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x03, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0xfb, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x05, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x06, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x03, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x09, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x08, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0e, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0f, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x10, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x11, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x12, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x15, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x16, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x0c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x17, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x0c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x78, ZC3XX_R18D_YTARGET}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x01, 0x01b1}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x60, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x4c, ZC3XX_R118_BGAIN}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x13, ZC3XX_R120_GAMMA00}, /* gamma 4 */ {0xa0, 0x38, ZC3XX_R121_GAMMA01}, {0xa0, 0x59, ZC3XX_R122_GAMMA02}, {0xa0, 0x79, ZC3XX_R123_GAMMA03}, {0xa0, 0x92, ZC3XX_R124_GAMMA04}, {0xa0, 0xa7, ZC3XX_R125_GAMMA05}, {0xa0, 0xb9, ZC3XX_R126_GAMMA06}, {0xa0, 0xc8, ZC3XX_R127_GAMMA07}, {0xa0, 0xd4, ZC3XX_R128_GAMMA08}, {0xa0, 0xdf, ZC3XX_R129_GAMMA09}, {0xa0, 0xe7, ZC3XX_R12A_GAMMA0A}, {0xa0, 0xee, ZC3XX_R12B_GAMMA0B}, {0xa0, 0xf4, ZC3XX_R12C_GAMMA0C}, {0xa0, 0xf9, ZC3XX_R12D_GAMMA0D}, {0xa0, 0xfc, ZC3XX_R12E_GAMMA0E}, {0xa0, 0xff, ZC3XX_R12F_GAMMA0F}, {0xa0, 0x26, ZC3XX_R130_GAMMA10}, {0xa0, 0x22, ZC3XX_R131_GAMMA11}, {0xa0, 0x20, ZC3XX_R132_GAMMA12}, {0xa0, 0x1c, ZC3XX_R133_GAMMA13}, {0xa0, 0x16, ZC3XX_R134_GAMMA14}, {0xa0, 0x13, ZC3XX_R135_GAMMA15}, {0xa0, 0x10, ZC3XX_R136_GAMMA16}, {0xa0, 0x0d, ZC3XX_R137_GAMMA17}, {0xa0, 0x0b, ZC3XX_R138_GAMMA18}, {0xa0, 0x09, ZC3XX_R139_GAMMA19}, {0xa0, 0x07, ZC3XX_R13A_GAMMA1A}, {0xa0, 0x06, ZC3XX_R13B_GAMMA1B}, {0xa0, 0x05, ZC3XX_R13C_GAMMA1C}, {0xa0, 0x04, ZC3XX_R13D_GAMMA1D}, {0xa0, 0x03, ZC3XX_R13E_GAMMA1E}, {0xa0, 0x02, ZC3XX_R13F_GAMMA1F}, {0xa0, 0x58, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf4, ZC3XX_R10B_RGB01}, {0xa0, 0xf4, ZC3XX_R10C_RGB02}, {0xa0, 0xf4, ZC3XX_R10D_RGB10}, {0xa0, 0x58, ZC3XX_R10E_RGB11}, {0xa0, 0xf4, ZC3XX_R10F_RGB12}, {0xa0, 0xf4, ZC3XX_R110_RGB20}, {0xa0, 0xf4, ZC3XX_R111_RGB21}, {0xa0, 0x58, ZC3XX_R112_RGB22}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x22, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x22, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, {0xa0, 0x22, ZC3XX_R0A4_EXPOSURETIMELOW}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xee, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x3a, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x0c, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x28, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x04, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x0f, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x19, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x1f, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x60, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x4c, ZC3XX_R118_BGAIN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x5c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x5c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x96, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x96, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {} }; static const struct usb_action cs2102K_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x08, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /*fixme: next sequence = i2c exchanges*/ {0xa0, 0x55, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0a, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0b, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0c, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x7b, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0d, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0xa3, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x03, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0xfb, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x05, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x06, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x03, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x09, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x08, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0e, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x0f, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x10, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x11, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x12, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x18, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x15, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x16, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x0c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x17, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x0c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0xf7, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x78, ZC3XX_R18D_YTARGET}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x01, 0x01b1}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x60, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x4c, ZC3XX_R118_BGAIN}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x13, ZC3XX_R120_GAMMA00}, /* gamma 4 */ {0xa0, 0x38, ZC3XX_R121_GAMMA01}, {0xa0, 0x59, ZC3XX_R122_GAMMA02}, {0xa0, 0x79, ZC3XX_R123_GAMMA03}, {0xa0, 0x92, ZC3XX_R124_GAMMA04}, {0xa0, 0xa7, ZC3XX_R125_GAMMA05}, {0xa0, 0xb9, ZC3XX_R126_GAMMA06}, {0xa0, 0xc8, ZC3XX_R127_GAMMA07}, {0xa0, 0xd4, ZC3XX_R128_GAMMA08}, {0xa0, 0xdf, ZC3XX_R129_GAMMA09}, {0xa0, 0xe7, ZC3XX_R12A_GAMMA0A}, {0xa0, 0xee, ZC3XX_R12B_GAMMA0B}, {0xa0, 0xf4, ZC3XX_R12C_GAMMA0C}, {0xa0, 0xf9, ZC3XX_R12D_GAMMA0D}, {0xa0, 0xfc, ZC3XX_R12E_GAMMA0E}, {0xa0, 0xff, ZC3XX_R12F_GAMMA0F}, {0xa0, 0x26, ZC3XX_R130_GAMMA10}, {0xa0, 0x22, ZC3XX_R131_GAMMA11}, {0xa0, 0x20, ZC3XX_R132_GAMMA12}, {0xa0, 0x1c, ZC3XX_R133_GAMMA13}, {0xa0, 0x16, ZC3XX_R134_GAMMA14}, {0xa0, 0x13, ZC3XX_R135_GAMMA15}, {0xa0, 0x10, ZC3XX_R136_GAMMA16}, {0xa0, 0x0d, ZC3XX_R137_GAMMA17}, {0xa0, 0x0b, ZC3XX_R138_GAMMA18}, {0xa0, 0x09, ZC3XX_R139_GAMMA19}, {0xa0, 0x07, ZC3XX_R13A_GAMMA1A}, {0xa0, 0x06, ZC3XX_R13B_GAMMA1B}, {0xa0, 0x05, ZC3XX_R13C_GAMMA1C}, {0xa0, 0x04, ZC3XX_R13D_GAMMA1D}, {0xa0, 0x03, ZC3XX_R13E_GAMMA1E}, {0xa0, 0x02, ZC3XX_R13F_GAMMA1F}, {0xa0, 0x58, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf4, ZC3XX_R10B_RGB01}, {0xa0, 0xf4, ZC3XX_R10C_RGB02}, {0xa0, 0xf4, ZC3XX_R10D_RGB10}, {0xa0, 0x58, ZC3XX_R10E_RGB11}, {0xa0, 0xf4, ZC3XX_R10F_RGB12}, {0xa0, 0xf4, ZC3XX_R110_RGB20}, {0xa0, 0xf4, ZC3XX_R111_RGB21}, {0xa0, 0x58, ZC3XX_R112_RGB22}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x22, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x22, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, {0xa0, 0x22, ZC3XX_R0A4_EXPOSURETIMELOW}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xee, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x3a, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x0c, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x28, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x04, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x0f, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x19, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x1f, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x60, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x4c, ZC3XX_R118_BGAIN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x5c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x5c, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x96, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x96, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, /*fixme:what does the next sequence?*/ {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0xd0, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0xd0, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x01, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x02, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x0a, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x0a, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x44, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x44, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x20, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x21, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x7e, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x00, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x13, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x7e, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x14, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x02, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x18, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x04, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x00, ZC3XX_R094_I2CWRITEACK}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {} }; static const struct usb_action gc0305_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xa0, 0x04, ZC3XX_R002_CLOCKSELECT}, /* 00,02,04,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,e0,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xe6, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,e6,cc */ {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,86,cc */ {0xa0, 0x98, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,98,cc */ {0xaa, 0x13, 0x0002}, /* 00,13,02,aa */ {0xaa, 0x15, 0x0003}, /* 00,15,03,aa */ {0xaa, 0x01, 0x0000}, /* 00,01,00,aa */ {0xaa, 0x02, 0x0000}, /* 00,02,00,aa */ {0xaa, 0x1a, 0x0000}, /* 00,1a,00,aa */ {0xaa, 0x1c, 0x0017}, /* 00,1c,17,aa */ {0xaa, 0x1d, 0x0080}, /* 00,1d,80,aa */ {0xaa, 0x1f, 0x0008}, /* 00,1f,08,aa */ {0xaa, 0x21, 0x0012}, /* 00,21,12,aa */ {0xa0, 0x82, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,82,cc */ {0xa0, 0x83, ZC3XX_R087_EXPTIMEMID}, /* 00,87,83,cc */ {0xa0, 0x84, ZC3XX_R088_EXPTIMELOW}, /* 00,88,84,cc */ {0xaa, 0x05, 0x0000}, /* 00,05,00,aa */ {0xaa, 0x0a, 0x0000}, /* 00,0a,00,aa */ {0xaa, 0x0b, 0x00b0}, /* 00,0b,b0,aa */ {0xaa, 0x0c, 0x0000}, /* 00,0c,00,aa */ {0xaa, 0x0d, 0x00b0}, /* 00,0d,b0,aa */ {0xaa, 0x0e, 0x0000}, /* 00,0e,00,aa */ {0xaa, 0x0f, 0x00b0}, /* 00,0f,b0,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x11, 0x00b0}, /* 00,11,b0,aa */ {0xaa, 0x16, 0x0001}, /* 00,16,01,aa */ {0xaa, 0x17, 0x00e6}, /* 00,17,e6,aa */ {0xaa, 0x18, 0x0002}, /* 00,18,02,aa */ {0xaa, 0x19, 0x0086}, /* 00,19,86,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x1b, 0x0020}, /* 00,1b,20,aa */ {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,b7,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x76, ZC3XX_R189_AWBSTATUS}, /* 01,89,76,cc */ {0xa0, 0x09, 0x01ad}, /* 01,ad,09,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,60,cc */ {0xa0, 0x85, ZC3XX_R18D_YTARGET}, /* 01,8d,85,cc */ {0xa0, 0x00, 0x011e}, /* 01,1e,00,cc */ {0xa0, 0x52, ZC3XX_R116_RGAIN}, /* 01,16,52,cc */ {0xa0, 0x40, ZC3XX_R117_GGAIN}, /* 01,17,40,cc */ {0xa0, 0x52, ZC3XX_R118_BGAIN}, /* 01,18,52,cc */ {0xa0, 0x03, ZC3XX_R113_RGB03}, /* 01,13,03,cc */ {} }; static const struct usb_action gc0305_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, /* 00,02,10,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,e0,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,e8,cc */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,88,cc */ {0xa0, 0x98, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,98,cc */ {0xaa, 0x13, 0x0000}, /* 00,13,00,aa */ {0xaa, 0x15, 0x0001}, /* 00,15,01,aa */ {0xaa, 0x01, 0x0000}, /* 00,01,00,aa */ {0xaa, 0x02, 0x0000}, /* 00,02,00,aa */ {0xaa, 0x1a, 0x0000}, /* 00,1a,00,aa */ {0xaa, 0x1c, 0x0017}, /* 00,1c,17,aa */ {0xaa, 0x1d, 0x0080}, /* 00,1d,80,aa */ {0xaa, 0x1f, 0x0008}, /* 00,1f,08,aa */ {0xaa, 0x21, 0x0012}, /* 00,21,12,aa */ {0xa0, 0x82, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,82,cc */ {0xa0, 0x83, ZC3XX_R087_EXPTIMEMID}, /* 00,87,83,cc */ {0xa0, 0x84, ZC3XX_R088_EXPTIMELOW}, /* 00,88,84,cc */ {0xaa, 0x05, 0x0000}, /* 00,05,00,aa */ {0xaa, 0x0a, 0x0000}, /* 00,0a,00,aa */ {0xaa, 0x0b, 0x00b0}, /* 00,0b,b0,aa */ {0xaa, 0x0c, 0x0000}, /* 00,0c,00,aa */ {0xaa, 0x0d, 0x00b0}, /* 00,0d,b0,aa */ {0xaa, 0x0e, 0x0000}, /* 00,0e,00,aa */ {0xaa, 0x0f, 0x00b0}, /* 00,0f,b0,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x11, 0x00b0}, /* 00,11,b0,aa */ {0xaa, 0x16, 0x0001}, /* 00,16,01,aa */ {0xaa, 0x17, 0x00e8}, /* 00,17,e8,aa */ {0xaa, 0x18, 0x0002}, /* 00,18,02,aa */ {0xaa, 0x19, 0x0088}, /* 00,19,88,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x1b, 0x0020}, /* 00,1b,20,aa */ {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,b7,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x76, ZC3XX_R189_AWBSTATUS}, /* 01,89,76,cc */ {0xa0, 0x09, 0x01ad}, /* 01,ad,09,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,60,cc */ {0xa0, 0x00, 0x011e}, /* 01,1e,00,cc */ {0xa0, 0x52, ZC3XX_R116_RGAIN}, /* 01,16,52,cc */ {0xa0, 0x40, ZC3XX_R117_GGAIN}, /* 01,17,40,cc */ {0xa0, 0x52, ZC3XX_R118_BGAIN}, /* 01,18,52,cc */ {0xa0, 0x03, ZC3XX_R113_RGB03}, /* 01,13,03,cc */ {} }; static const struct usb_action gc0305_50HZ[] = { {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0002}, /* 00,83,02,aa */ {0xaa, 0x84, 0x0038}, /* 00,84,38,aa */ /* win: 00,84,ec */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x0b, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,0b,cc */ {0xa0, 0x18, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,18,cc */ /* win: 01,92,10 */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x8e, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,8e,cc */ /* win: 01,97,ec */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,60,cc */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc */ /* {0xa0, 0x85, ZC3XX_R18D_YTARGET}, * 01,8d,85,cc * * if 640x480 */ {} }; static const struct usb_action gc0305_60HZ[] = { {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0000}, /* 00,83,00,aa */ {0xaa, 0x84, 0x00ec}, /* 00,84,ec,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x0b, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,0b,cc */ {0xa0, 0x10, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,10,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0xec, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,ec,cc */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,60,cc */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc */ {0xa0, 0x80, ZC3XX_R18D_YTARGET}, /* 01,8d,80,cc */ {} }; static const struct usb_action gc0305_NoFlicker[] = { {0xa0, 0x0c, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0c,cc */ {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0000}, /* 00,83,00,aa */ {0xaa, 0x84, 0x0020}, /* 00,84,20,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x00, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,00,cc */ {0xa0, 0x48, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,48,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x10, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,10,cc */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,60,cc */ {0xa0, 0x03, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,03,cc */ {0xa0, 0x80, ZC3XX_R18D_YTARGET}, /* 01,8d,80,cc */ {} }; static const struct usb_action hdcs2020_InitialScale[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x11, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* qtable 0x05 */ {0xa0, 0x08, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, {0xaa, 0x1c, 0x0000}, {0xaa, 0x0a, 0x0001}, {0xaa, 0x0b, 0x0006}, {0xaa, 0x0c, 0x007b}, {0xaa, 0x0d, 0x00a7}, {0xaa, 0x03, 0x00fb}, {0xaa, 0x05, 0x0000}, {0xaa, 0x06, 0x0003}, {0xaa, 0x09, 0x0008}, {0xaa, 0x0f, 0x0018}, /* set sensor gain */ {0xaa, 0x10, 0x0018}, {0xaa, 0x11, 0x0018}, {0xaa, 0x12, 0x0018}, {0xaa, 0x15, 0x004e}, {0xaa, 0x1c, 0x0004}, {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x70, ZC3XX_R18D_YTARGET}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa1, 0x01, 0x0002}, {0xa1, 0x01, 0x0008}, {0xa1, 0x01, 0x0180}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {0xa1, 0x01, 0x0008}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa1, 0x01, 0x01c8}, {0xa1, 0x01, 0x01c9}, {0xa1, 0x01, 0x01ca}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x13, ZC3XX_R120_GAMMA00}, /* gamma 4 */ {0xa0, 0x38, ZC3XX_R121_GAMMA01}, {0xa0, 0x59, ZC3XX_R122_GAMMA02}, {0xa0, 0x79, ZC3XX_R123_GAMMA03}, {0xa0, 0x92, ZC3XX_R124_GAMMA04}, {0xa0, 0xa7, ZC3XX_R125_GAMMA05}, {0xa0, 0xb9, ZC3XX_R126_GAMMA06}, {0xa0, 0xc8, ZC3XX_R127_GAMMA07}, {0xa0, 0xd4, ZC3XX_R128_GAMMA08}, {0xa0, 0xdf, ZC3XX_R129_GAMMA09}, {0xa0, 0xe7, ZC3XX_R12A_GAMMA0A}, {0xa0, 0xee, ZC3XX_R12B_GAMMA0B}, {0xa0, 0xf4, ZC3XX_R12C_GAMMA0C}, {0xa0, 0xf9, ZC3XX_R12D_GAMMA0D}, {0xa0, 0xfc, ZC3XX_R12E_GAMMA0E}, {0xa0, 0xff, ZC3XX_R12F_GAMMA0F}, {0xa0, 0x26, ZC3XX_R130_GAMMA10}, {0xa0, 0x22, ZC3XX_R131_GAMMA11}, {0xa0, 0x20, ZC3XX_R132_GAMMA12}, {0xa0, 0x1c, ZC3XX_R133_GAMMA13}, {0xa0, 0x16, ZC3XX_R134_GAMMA14}, {0xa0, 0x13, ZC3XX_R135_GAMMA15}, {0xa0, 0x10, ZC3XX_R136_GAMMA16}, {0xa0, 0x0d, ZC3XX_R137_GAMMA17}, {0xa0, 0x0b, ZC3XX_R138_GAMMA18}, {0xa0, 0x09, ZC3XX_R139_GAMMA19}, {0xa0, 0x07, ZC3XX_R13A_GAMMA1A}, {0xa0, 0x06, ZC3XX_R13B_GAMMA1B}, {0xa0, 0x05, ZC3XX_R13C_GAMMA1C}, {0xa0, 0x04, ZC3XX_R13D_GAMMA1D}, {0xa0, 0x03, ZC3XX_R13E_GAMMA1E}, {0xa0, 0x02, ZC3XX_R13F_GAMMA1F}, {0xa0, 0x66, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xed, ZC3XX_R10B_RGB01}, {0xa0, 0xed, ZC3XX_R10C_RGB02}, {0xa0, 0xed, ZC3XX_R10D_RGB10}, {0xa0, 0x66, ZC3XX_R10E_RGB11}, {0xa0, 0xed, ZC3XX_R10F_RGB12}, {0xa0, 0xed, ZC3XX_R110_RGB20}, {0xa0, 0xed, ZC3XX_R111_RGB21}, {0xa0, 0x66, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0180}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x13, 0x0031}, {0xaa, 0x14, 0x0001}, {0xaa, 0x0e, 0x0004}, {0xaa, 0x19, 0x00cd}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x62, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x3d, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x0c, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 0x14 */ {0xa0, 0x28, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x04, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x18, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x2c, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x41, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa1, 0x01, 0x0180}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action hdcs2020_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x08, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, {0xaa, 0x1c, 0x0000}, {0xaa, 0x0a, 0x0001}, {0xaa, 0x0b, 0x0006}, {0xaa, 0x0c, 0x007a}, {0xaa, 0x0d, 0x00a7}, {0xaa, 0x03, 0x00fb}, {0xaa, 0x05, 0x0000}, {0xaa, 0x06, 0x0003}, {0xaa, 0x09, 0x0008}, {0xaa, 0x0f, 0x0018}, /* original setting */ {0xaa, 0x10, 0x0018}, {0xaa, 0x11, 0x0018}, {0xaa, 0x12, 0x0018}, {0xaa, 0x15, 0x004e}, {0xaa, 0x1c, 0x0004}, {0xa0, 0xf7, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x70, ZC3XX_R18D_YTARGET}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa1, 0x01, 0x0002}, {0xa1, 0x01, 0x0008}, {0xa1, 0x01, 0x0180}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {0xa1, 0x01, 0x0008}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa1, 0x01, 0x01c8}, {0xa1, 0x01, 0x01c9}, {0xa1, 0x01, 0x01ca}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x13, ZC3XX_R120_GAMMA00}, /* gamma 4 */ {0xa0, 0x38, ZC3XX_R121_GAMMA01}, {0xa0, 0x59, ZC3XX_R122_GAMMA02}, {0xa0, 0x79, ZC3XX_R123_GAMMA03}, {0xa0, 0x92, ZC3XX_R124_GAMMA04}, {0xa0, 0xa7, ZC3XX_R125_GAMMA05}, {0xa0, 0xb9, ZC3XX_R126_GAMMA06}, {0xa0, 0xc8, ZC3XX_R127_GAMMA07}, {0xa0, 0xd4, ZC3XX_R128_GAMMA08}, {0xa0, 0xdf, ZC3XX_R129_GAMMA09}, {0xa0, 0xe7, ZC3XX_R12A_GAMMA0A}, {0xa0, 0xee, ZC3XX_R12B_GAMMA0B}, {0xa0, 0xf4, ZC3XX_R12C_GAMMA0C}, {0xa0, 0xf9, ZC3XX_R12D_GAMMA0D}, {0xa0, 0xfc, ZC3XX_R12E_GAMMA0E}, {0xa0, 0xff, ZC3XX_R12F_GAMMA0F}, {0xa0, 0x26, ZC3XX_R130_GAMMA10}, {0xa0, 0x22, ZC3XX_R131_GAMMA11}, {0xa0, 0x20, ZC3XX_R132_GAMMA12}, {0xa0, 0x1c, ZC3XX_R133_GAMMA13}, {0xa0, 0x16, ZC3XX_R134_GAMMA14}, {0xa0, 0x13, ZC3XX_R135_GAMMA15}, {0xa0, 0x10, ZC3XX_R136_GAMMA16}, {0xa0, 0x0d, ZC3XX_R137_GAMMA17}, {0xa0, 0x0b, ZC3XX_R138_GAMMA18}, {0xa0, 0x09, ZC3XX_R139_GAMMA19}, {0xa0, 0x07, ZC3XX_R13A_GAMMA1A}, {0xa0, 0x06, ZC3XX_R13B_GAMMA1B}, {0xa0, 0x05, ZC3XX_R13C_GAMMA1C}, {0xa0, 0x04, ZC3XX_R13D_GAMMA1D}, {0xa0, 0x03, ZC3XX_R13E_GAMMA1E}, {0xa0, 0x02, ZC3XX_R13F_GAMMA1F}, {0xa0, 0x66, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xed, ZC3XX_R10B_RGB01}, {0xa0, 0xed, ZC3XX_R10C_RGB02}, {0xa0, 0xed, ZC3XX_R10D_RGB10}, {0xa0, 0x66, ZC3XX_R10E_RGB11}, {0xa0, 0xed, ZC3XX_R10F_RGB12}, {0xa0, 0xed, ZC3XX_R110_RGB20}, {0xa0, 0xed, ZC3XX_R111_RGB21}, {0xa0, 0x66, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0180}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /**** set exposure ***/ {0xaa, 0x13, 0x0031}, {0xaa, 0x14, 0x0001}, {0xaa, 0x0e, 0x0004}, {0xaa, 0x19, 0x00cd}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x62, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x3d, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x0c, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x28, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x04, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x18, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x2c, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x41, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa1, 0x01, 0x0180}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action hdcs2020_50HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x13, 0x0018}, /* 00,13,18,aa */ {0xaa, 0x14, 0x0001}, /* 00,14,01,aa */ {0xaa, 0x0e, 0x0005}, /* 00,0e,05,aa */ {0xaa, 0x19, 0x001f}, /* 00,19,1f,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,02,cc */ {0xa0, 0x76, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,76,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x46, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,46,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x0c, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,0c,cc */ {0xa0, 0x28, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,28,cc */ {0xa0, 0x05, ZC3XX_R01D_HSYNC_0}, /* 00,1d,05,cc */ {0xa0, 0x1a, ZC3XX_R01E_HSYNC_1}, /* 00,1e,1a,cc */ {0xa0, 0x2f, ZC3XX_R01F_HSYNC_2}, /* 00,1f,2f,cc */ {} }; static const struct usb_action hdcs2020_60HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x13, 0x0031}, /* 00,13,31,aa */ {0xaa, 0x14, 0x0001}, /* 00,14,01,aa */ {0xaa, 0x0e, 0x0004}, /* 00,0e,04,aa */ {0xaa, 0x19, 0x00cd}, /* 00,19,cd,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,02,cc */ {0xa0, 0x62, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,62,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x3d, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,3d,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x0c, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,0c,cc */ {0xa0, 0x28, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,28,cc */ {0xa0, 0x04, ZC3XX_R01D_HSYNC_0}, /* 00,1d,04,cc */ {0xa0, 0x18, ZC3XX_R01E_HSYNC_1}, /* 00,1e,18,cc */ {0xa0, 0x2c, ZC3XX_R01F_HSYNC_2}, /* 00,1f,2c,cc */ {} }; static const struct usb_action hdcs2020_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x13, 0x0010}, /* 00,13,10,aa */ {0xaa, 0x14, 0x0001}, /* 00,14,01,aa */ {0xaa, 0x0e, 0x0004}, /* 00,0e,04,aa */ {0xaa, 0x19, 0x0000}, /* 00,19,00,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,02,cc */ {0xa0, 0x70, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,70,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x10, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,10,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,00,cc */ {0xa0, 0x04, ZC3XX_R01D_HSYNC_0}, /* 00,1d,04,cc */ {0xa0, 0x17, ZC3XX_R01E_HSYNC_1}, /* 00,1e,17,cc */ {0xa0, 0x2a, ZC3XX_R01F_HSYNC_2}, /* 00,1f,2a,cc */ {} }; static const struct usb_action hv7131b_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x00, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00 */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xaa, 0x30, 0x002d}, {0xaa, 0x01, 0x0005}, {0xaa, 0x11, 0x0000}, {0xaa, 0x13, 0x0001}, /* {0xaa, 0x13, 0x0000}, */ {0xaa, 0x14, 0x0001}, {0xaa, 0x15, 0x00e8}, {0xaa, 0x16, 0x0002}, {0xaa, 0x17, 0x0086}, /* 00,17,88,aa */ {0xaa, 0x31, 0x0038}, {0xaa, 0x32, 0x0038}, {0xaa, 0x33, 0x0038}, {0xaa, 0x5b, 0x0001}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x68, ZC3XX_R18D_YTARGET}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0xc0, 0x019b}, {0xa0, 0xa0, 0x019c}, {0xa0, 0x02, ZC3XX_R188_MINGAIN}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xaa, 0x02, 0x0090}, /* 00,02,80,aa */ {} }; static const struct usb_action hv7131b_Initial[] = { /* 640x480*/ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x00, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00 */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xaa, 0x30, 0x002d}, {0xaa, 0x01, 0x0005}, {0xaa, 0x11, 0x0001}, {0xaa, 0x13, 0x0000}, /* {0xaa, 0x13, 0x0001}; */ {0xaa, 0x14, 0x0001}, {0xaa, 0x15, 0x00e6}, {0xaa, 0x16, 0x0002}, {0xaa, 0x17, 0x0086}, {0xaa, 0x31, 0x0038}, {0xaa, 0x32, 0x0038}, {0xaa, 0x33, 0x0038}, {0xaa, 0x5b, 0x0001}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x70, ZC3XX_R18D_YTARGET}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0xc0, 0x019b}, {0xa0, 0xa0, 0x019c}, {0xa0, 0x02, ZC3XX_R188_MINGAIN}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xaa, 0x02, 0x0090}, /* {0xaa, 0x02, 0x0080}, */ {} }; static const struct usb_action hv7131b_50HZ[] = { /* 640x480*/ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x25, 0x0007}, /* 00,25,07,aa */ {0xaa, 0x26, 0x0053}, /* 00,26,53,aa */ {0xaa, 0x27, 0x0000}, /* 00,27,00,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x21, 0x0050}, /* 00,21,50,aa */ {0xaa, 0x22, 0x001b}, /* 00,22,1b,aa */ {0xaa, 0x23, 0x00fc}, /* 00,23,fc,aa */ {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,2f,cc */ {0xa0, 0x9b, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,9b,cc */ {0xa0, 0x80, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,80,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0xea, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,ea,cc */ {0xa0, 0x60, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,60,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0c,cc */ {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,18,cc */ {0xa0, 0x18, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,18,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, /* 00,1d,00,cc */ {0xa0, 0x50, ZC3XX_R01E_HSYNC_1}, /* 00,1e,50,cc */ {0xa0, 0x1b, ZC3XX_R01F_HSYNC_2}, /* 00,1f,1b,cc */ {0xa0, 0xfc, ZC3XX_R020_HSYNC_3}, /* 00,20,fc,cc */ {} }; static const struct usb_action hv7131b_50HZScale[] = { /* 320x240 */ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x25, 0x0007}, /* 00,25,07,aa */ {0xaa, 0x26, 0x0053}, /* 00,26,53,aa */ {0xaa, 0x27, 0x0000}, /* 00,27,00,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x21, 0x0050}, /* 00,21,50,aa */ {0xaa, 0x22, 0x0012}, /* 00,22,12,aa */ {0xaa, 0x23, 0x0080}, /* 00,23,80,aa */ {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,2f,cc */ {0xa0, 0x9b, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,9b,cc */ {0xa0, 0x80, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,80,cc */ {0xa0, 0x01, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,01,cc */ {0xa0, 0xd4, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,d4,cc */ {0xa0, 0xc0, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,c0,cc */ {0xa0, 0x07, ZC3XX_R18C_AEFREEZE}, /* 01,8c,07,cc */ {0xa0, 0x0f, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,0f,cc */ {0xa0, 0x18, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,18,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, /* 00,1d,00,cc */ {0xa0, 0x50, ZC3XX_R01E_HSYNC_1}, /* 00,1e,50,cc */ {0xa0, 0x12, ZC3XX_R01F_HSYNC_2}, /* 00,1f,12,cc */ {0xa0, 0x80, ZC3XX_R020_HSYNC_3}, /* 00,20,80,cc */ {} }; static const struct usb_action hv7131b_60HZ[] = { /* 640x480*/ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x25, 0x0007}, /* 00,25,07,aa */ {0xaa, 0x26, 0x00a1}, /* 00,26,a1,aa */ {0xaa, 0x27, 0x0020}, /* 00,27,20,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x21, 0x0040}, /* 00,21,40,aa */ {0xaa, 0x22, 0x0013}, /* 00,22,13,aa */ {0xaa, 0x23, 0x004c}, /* 00,23,4c,aa */ {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,2f,cc */ {0xa0, 0x4d, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,4d,cc */ {0xa0, 0x60, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,60,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0xc3, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,c3,cc */ {0xa0, 0x50, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,50,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0c,cc */ {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,18,cc */ {0xa0, 0x18, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,18,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, /* 00,1d,00,cc */ {0xa0, 0x40, ZC3XX_R01E_HSYNC_1}, /* 00,1e,40,cc */ {0xa0, 0x13, ZC3XX_R01F_HSYNC_2}, /* 00,1f,13,cc */ {0xa0, 0x4c, ZC3XX_R020_HSYNC_3}, /* 00,20,4c,cc */ {} }; static const struct usb_action hv7131b_60HZScale[] = { /* 320x240 */ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x25, 0x0007}, /* 00,25,07,aa */ {0xaa, 0x26, 0x00a1}, /* 00,26,a1,aa */ {0xaa, 0x27, 0x0020}, /* 00,27,20,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x21, 0x00a0}, /* 00,21,a0,aa */ {0xaa, 0x22, 0x0016}, /* 00,22,16,aa */ {0xaa, 0x23, 0x0040}, /* 00,23,40,aa */ {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,2f,cc */ {0xa0, 0x4d, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,4d,cc */ {0xa0, 0x60, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,60,cc */ {0xa0, 0x01, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,01,cc */ {0xa0, 0x86, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,86,cc */ {0xa0, 0xa0, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,a0,cc */ {0xa0, 0x07, ZC3XX_R18C_AEFREEZE}, /* 01,8c,07,cc */ {0xa0, 0x0f, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,0f,cc */ {0xa0, 0x18, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,18,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, /* 00,1d,00,cc */ {0xa0, 0xa0, ZC3XX_R01E_HSYNC_1}, /* 00,1e,a0,cc */ {0xa0, 0x16, ZC3XX_R01F_HSYNC_2}, /* 00,1f,16,cc */ {0xa0, 0x40, ZC3XX_R020_HSYNC_3}, /* 00,20,40,cc */ {} }; static const struct usb_action hv7131b_NoFlicker[] = { /* 640x480*/ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x25, 0x0003}, /* 00,25,03,aa */ {0xaa, 0x26, 0x0000}, /* 00,26,00,aa */ {0xaa, 0x27, 0x0000}, /* 00,27,00,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x21, 0x0010}, /* 00,21,10,aa */ {0xaa, 0x22, 0x0000}, /* 00,22,00,aa */ {0xaa, 0x23, 0x0003}, /* 00,23,03,aa */ {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,2f,cc */ {0xa0, 0xf8, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,f8,cc */ {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,00,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x02, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,02,cc */ {0xa0, 0x00, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,00,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,00,cc */ {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, /* 00,1d,00,cc */ {0xa0, 0x10, ZC3XX_R01E_HSYNC_1}, /* 00,1e,10,cc */ {0xa0, 0x00, ZC3XX_R01F_HSYNC_2}, /* 00,1f,00,cc */ {0xa0, 0x03, ZC3XX_R020_HSYNC_3}, /* 00,20,03,cc */ {} }; static const struct usb_action hv7131b_NoFlickerScale[] = { /* 320x240 */ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x25, 0x0003}, /* 00,25,03,aa */ {0xaa, 0x26, 0x0000}, /* 00,26,00,aa */ {0xaa, 0x27, 0x0000}, /* 00,27,00,aa */ {0xaa, 0x20, 0x0000}, /* 00,20,00,aa */ {0xaa, 0x21, 0x00a0}, /* 00,21,a0,aa */ {0xaa, 0x22, 0x0016}, /* 00,22,16,aa */ {0xaa, 0x23, 0x0040}, /* 00,23,40,aa */ {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,2f,cc */ {0xa0, 0xf8, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,f8,cc */ {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,00,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x02, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,02,cc */ {0xa0, 0x00, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,00,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,00,cc */ {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, /* 00,1d,00,cc */ {0xa0, 0xa0, ZC3XX_R01E_HSYNC_1}, /* 00,1e,a0,cc */ {0xa0, 0x16, ZC3XX_R01F_HSYNC_2}, /* 00,1f,16,cc */ {0xa0, 0x40, ZC3XX_R020_HSYNC_3}, /* 00,20,40,cc */ {} }; /* from lPEPI264v.inf (hv7131b!) */ static const struct usb_action hv7131r_InitialScale[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x000c}, {0xaa, 0x11, 0x0000}, {0xaa, 0x13, 0x0000}, {0xaa, 0x14, 0x0001}, {0xaa, 0x15, 0x00e8}, {0xaa, 0x16, 0x0002}, {0xaa, 0x17, 0x0088}, {0xaa, 0x30, 0x000b}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x78, ZC3XX_R18D_YTARGET}, {0xa0, 0x50, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0xc0, 0x019b}, {0xa0, 0xa0, 0x019c}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {} }; static const struct usb_action hv7131r_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, {0xa0, 0xe6, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x000c}, {0xaa, 0x11, 0x0000}, {0xaa, 0x13, 0x0000}, {0xaa, 0x14, 0x0001}, {0xaa, 0x15, 0x00e6}, {0xaa, 0x16, 0x0002}, {0xaa, 0x17, 0x0086}, {0xaa, 0x30, 0x000b}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x78, ZC3XX_R18D_YTARGET}, {0xa0, 0x50, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0xc0, 0x019b}, {0xa0, 0xa0, 0x019c}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {} }; static const struct usb_action hv7131r_50HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x06, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x68, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xa0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0xea, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x60, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x18, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x00, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x08, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action hv7131r_50HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x0c, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0xd1, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x40, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x01, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0xd4, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0xc0, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x18, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x00, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x08, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action hv7131r_60HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x06, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x1a, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x80, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0xc3, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x50, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x18, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x00, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x08, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action hv7131r_60HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x0c, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x35, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x01, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x86, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0xa0, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x18, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x00, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x08, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action hv7131r_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0xf8, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x02, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x58, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x00, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x08, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action hv7131r_NoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xa0, 0x2f, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0xf8, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x04, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0xb0, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x00, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x00, ZC3XX_R01F_HSYNC_2}, {0xa0, 0x08, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action icm105a_InitialScale[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0c, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0xa1, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x00, ZC3XX_R097_WINYSTARTHIGH}, {0xa0, 0x01, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R099_WINXSTARTHIGH}, {0xa0, 0x01, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x01, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x01, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xaa, 0x01, 0x0010}, {0xaa, 0x03, 0x0000}, {0xaa, 0x04, 0x0001}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0001}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0001}, {0xaa, 0x04, 0x0011}, {0xaa, 0x05, 0x00a0}, {0xaa, 0x06, 0x0001}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0002}, {0xaa, 0x04, 0x0013}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0001}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0003}, {0xaa, 0x04, 0x0015}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0004}, {0xaa, 0x04, 0x0017}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x000d}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0005}, {0xaa, 0x04, 0x0019}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0006}, {0xaa, 0x04, 0x0017}, {0xaa, 0x05, 0x0026}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0007}, {0xaa, 0x04, 0x0019}, {0xaa, 0x05, 0x0022}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0008}, {0xaa, 0x04, 0x0021}, {0xaa, 0x05, 0x00aa}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0009}, {0xaa, 0x04, 0x0023}, {0xaa, 0x05, 0x00aa}, {0xaa, 0x06, 0x000d}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x000a}, {0xaa, 0x04, 0x0025}, {0xaa, 0x05, 0x00aa}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x000b}, {0xaa, 0x04, 0x00ec}, {0xaa, 0x05, 0x002e}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x000c}, {0xaa, 0x04, 0x00fa}, {0xaa, 0x05, 0x002a}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x07, 0x000d}, {0xaa, 0x01, 0x0005}, {0xaa, 0x94, 0x0002}, {0xaa, 0x90, 0x0000}, {0xaa, 0x91, 0x001f}, {0xaa, 0x10, 0x0064}, {0xaa, 0x9b, 0x00f0}, {0xaa, 0x9c, 0x0002}, {0xaa, 0x14, 0x001a}, {0xaa, 0x20, 0x0080}, {0xaa, 0x22, 0x0080}, {0xaa, 0x24, 0x0080}, {0xaa, 0x26, 0x0080}, {0xaa, 0x00, 0x0084}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xaa, 0xa8, 0x00c0}, {0xa1, 0x01, 0x0002}, {0xa1, 0x01, 0x0008}, {0xa1, 0x01, 0x0180}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {0xa1, 0x01, 0x0008}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa1, 0x01, 0x01c8}, {0xa1, 0x01, 0x01c9}, {0xa1, 0x01, 0x01ca}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x52, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf7, ZC3XX_R10B_RGB01}, {0xa0, 0xf7, ZC3XX_R10C_RGB02}, {0xa0, 0xf7, ZC3XX_R10D_RGB10}, {0xa0, 0x52, ZC3XX_R10E_RGB11}, {0xa0, 0xf7, ZC3XX_R10F_RGB12}, {0xa0, 0xf7, ZC3XX_R110_RGB20}, {0xa0, 0xf7, ZC3XX_R111_RGB21}, {0xa0, 0x52, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0180}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x0d, 0x0003}, {0xaa, 0x0c, 0x008c}, {0xaa, 0x0e, 0x0095}, {0xaa, 0x0f, 0x0002}, {0xaa, 0x1c, 0x0094}, {0xaa, 0x1d, 0x0002}, {0xaa, 0x20, 0x0080}, {0xaa, 0x22, 0x0080}, {0xaa, 0x24, 0x0080}, {0xaa, 0x26, 0x0080}, {0xaa, 0x00, 0x0084}, {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, {0xa0, 0x94, ZC3XX_R0A4_EXPOSURETIMELOW}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x20, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x84, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x12, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xe3, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xec, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf5, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0xc0, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0xc0, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa1, 0x01, 0x0180}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action icm105a_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0c, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0xa1, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x00, ZC3XX_R097_WINYSTARTHIGH}, {0xa0, 0x02, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R099_WINXSTARTHIGH}, {0xa0, 0x02, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x02, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x02, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, {0xa0, 0xe6, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xaa, 0x01, 0x0010}, {0xaa, 0x03, 0x0000}, {0xaa, 0x04, 0x0001}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0001}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0001}, {0xaa, 0x04, 0x0011}, {0xaa, 0x05, 0x00a0}, {0xaa, 0x06, 0x0001}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0002}, {0xaa, 0x04, 0x0013}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0001}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0003}, {0xaa, 0x04, 0x0015}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0004}, {0xaa, 0x04, 0x0017}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x000d}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0005}, {0xa0, 0x04, ZC3XX_R092_I2CADDRESSSELECT}, {0xa0, 0x19, ZC3XX_R093_I2CSETVALUE}, {0xa0, 0x01, ZC3XX_R090_I2CCOMMAND}, {0xa1, 0x01, 0x0091}, {0xaa, 0x05, 0x0020}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0006}, {0xaa, 0x04, 0x0017}, {0xaa, 0x05, 0x0026}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0007}, {0xaa, 0x04, 0x0019}, {0xaa, 0x05, 0x0022}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0008}, {0xaa, 0x04, 0x0021}, {0xaa, 0x05, 0x00aa}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x0009}, {0xaa, 0x04, 0x0023}, {0xaa, 0x05, 0x00aa}, {0xaa, 0x06, 0x000d}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x000a}, {0xaa, 0x04, 0x0025}, {0xaa, 0x05, 0x00aa}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x000b}, {0xaa, 0x04, 0x00ec}, {0xaa, 0x05, 0x002e}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x03, 0x000c}, {0xaa, 0x04, 0x00fa}, {0xaa, 0x05, 0x002a}, {0xaa, 0x06, 0x0005}, {0xaa, 0x08, 0x0000}, {0xaa, 0x07, 0x000d}, {0xaa, 0x01, 0x0005}, {0xaa, 0x94, 0x0002}, {0xaa, 0x90, 0x0000}, {0xaa, 0x91, 0x0010}, {0xaa, 0x10, 0x0064}, {0xaa, 0x9b, 0x00f0}, {0xaa, 0x9c, 0x0002}, {0xaa, 0x14, 0x001a}, {0xaa, 0x20, 0x0080}, {0xaa, 0x22, 0x0080}, {0xaa, 0x24, 0x0080}, {0xaa, 0x26, 0x0080}, {0xaa, 0x00, 0x0084}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xaa, 0xa8, 0x0080}, {0xa0, 0x78, ZC3XX_R18D_YTARGET}, {0xa1, 0x01, 0x0002}, {0xa1, 0x01, 0x0008}, {0xa1, 0x01, 0x0180}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {0xa1, 0x01, 0x0008}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa1, 0x01, 0x01c8}, {0xa1, 0x01, 0x01c9}, {0xa1, 0x01, 0x01ca}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x52, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf7, ZC3XX_R10B_RGB01}, {0xa0, 0xf7, ZC3XX_R10C_RGB02}, {0xa0, 0xf7, ZC3XX_R10D_RGB10}, {0xa0, 0x52, ZC3XX_R10E_RGB11}, {0xa0, 0xf7, ZC3XX_R10F_RGB12}, {0xa0, 0xf7, ZC3XX_R110_RGB20}, {0xa0, 0xf7, ZC3XX_R111_RGB21}, {0xa0, 0x52, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0180}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x0d, 0x0003}, {0xaa, 0x0c, 0x0020}, {0xaa, 0x0e, 0x000e}, {0xaa, 0x0f, 0x0002}, {0xaa, 0x1c, 0x000d}, {0xaa, 0x1d, 0x0002}, {0xaa, 0x20, 0x0080}, {0xaa, 0x22, 0x0080}, {0xaa, 0x24, 0x0080}, {0xaa, 0x26, 0x0080}, {0xaa, 0x00, 0x0084}, {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, {0xa0, 0x0d, ZC3XX_R0A4_EXPOSURETIMELOW}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x1a, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x4b, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x12, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xc8, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xd8, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xea, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa1, 0x01, 0x0180}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action icm105a_50HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x0d, 0x0003}, /* 00,0d,03,aa */ {0xaa, 0x0c, 0x0020}, /* 00,0c,20,aa */ {0xaa, 0x0e, 0x000e}, /* 00,0e,0e,aa */ {0xaa, 0x0f, 0x0002}, /* 00,0f,02,aa */ {0xaa, 0x1c, 0x000d}, /* 00,1c,0d,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x20, 0x0080}, /* 00,20,80,aa */ {0xaa, 0x22, 0x0080}, /* 00,22,80,aa */ {0xaa, 0x24, 0x0080}, /* 00,24,80,aa */ {0xaa, 0x26, 0x0080}, /* 00,26,80,aa */ {0xaa, 0x00, 0x0084}, /* 00,00,84,aa */ {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,02,cc */ {0xa0, 0x0d, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,0d,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x1a, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,1a,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x4b, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,4b,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x12, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,12,cc */ {0xa0, 0xc8, ZC3XX_R01D_HSYNC_0}, /* 00,1d,c8,cc */ {0xa0, 0xd8, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d8,cc */ {0xa0, 0xea, ZC3XX_R01F_HSYNC_2}, /* 00,1f,ea,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {} }; static const struct usb_action icm105a_50HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x0d, 0x0003}, /* 00,0d,03,aa */ {0xaa, 0x0c, 0x008c}, /* 00,0c,8c,aa */ {0xaa, 0x0e, 0x0095}, /* 00,0e,95,aa */ {0xaa, 0x0f, 0x0002}, /* 00,0f,02,aa */ {0xaa, 0x1c, 0x0094}, /* 00,1c,94,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x20, 0x0080}, /* 00,20,80,aa */ {0xaa, 0x22, 0x0080}, /* 00,22,80,aa */ {0xaa, 0x24, 0x0080}, /* 00,24,80,aa */ {0xaa, 0x26, 0x0080}, /* 00,26,80,aa */ {0xaa, 0x00, 0x0084}, /* 00,00,84,aa */ {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,02,cc */ {0xa0, 0x94, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,94,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x20, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,20,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x84, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,84,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x12, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,12,cc */ {0xa0, 0xe3, ZC3XX_R01D_HSYNC_0}, /* 00,1d,e3,cc */ {0xa0, 0xec, ZC3XX_R01E_HSYNC_1}, /* 00,1e,ec,cc */ {0xa0, 0xf5, ZC3XX_R01F_HSYNC_2}, /* 00,1f,f5,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, /* 01,a7,00,cc */ {0xa0, 0xc0, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,c0,cc */ {} }; static const struct usb_action icm105a_60HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x0d, 0x0003}, /* 00,0d,03,aa */ {0xaa, 0x0c, 0x0004}, /* 00,0c,04,aa */ {0xaa, 0x0e, 0x000d}, /* 00,0e,0d,aa */ {0xaa, 0x0f, 0x0002}, /* 00,0f,02,aa */ {0xaa, 0x1c, 0x0008}, /* 00,1c,08,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x20, 0x0080}, /* 00,20,80,aa */ {0xaa, 0x22, 0x0080}, /* 00,22,80,aa */ {0xaa, 0x24, 0x0080}, /* 00,24,80,aa */ {0xaa, 0x26, 0x0080}, /* 00,26,80,aa */ {0xaa, 0x00, 0x0084}, /* 00,00,84,aa */ {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,02,cc */ {0xa0, 0x08, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,08,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x10, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,10,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x41, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,41,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x12, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,12,cc */ {0xa0, 0xc1, ZC3XX_R01D_HSYNC_0}, /* 00,1d,c1,cc */ {0xa0, 0xd4, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d4,cc */ {0xa0, 0xe8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,e8,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {} }; static const struct usb_action icm105a_60HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x0d, 0x0003}, /* 00,0d,03,aa */ {0xaa, 0x0c, 0x0008}, /* 00,0c,08,aa */ {0xaa, 0x0e, 0x0086}, /* 00,0e,86,aa */ {0xaa, 0x0f, 0x0002}, /* 00,0f,02,aa */ {0xaa, 0x1c, 0x0085}, /* 00,1c,85,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x20, 0x0080}, /* 00,20,80,aa */ {0xaa, 0x22, 0x0080}, /* 00,22,80,aa */ {0xaa, 0x24, 0x0080}, /* 00,24,80,aa */ {0xaa, 0x26, 0x0080}, /* 00,26,80,aa */ {0xaa, 0x00, 0x0084}, /* 00,00,84,aa */ {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,02,cc */ {0xa0, 0x85, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,85,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x08, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,08,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x81, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,81,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x12, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,12,cc */ {0xa0, 0xc2, ZC3XX_R01D_HSYNC_0}, /* 00,1d,c2,cc */ {0xa0, 0xd6, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d6,cc */ {0xa0, 0xea, ZC3XX_R01F_HSYNC_2}, /* 00,1f,ea,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, /* 01,a7,00,cc */ {0xa0, 0xc0, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,c0,cc */ {} }; static const struct usb_action icm105a_NoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x0d, 0x0003}, /* 00,0d,03,aa */ {0xaa, 0x0c, 0x0004}, /* 00,0c,04,aa */ {0xaa, 0x0e, 0x000d}, /* 00,0e,0d,aa */ {0xaa, 0x0f, 0x0002}, /* 00,0f,02,aa */ {0xaa, 0x1c, 0x0000}, /* 00,1c,00,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x20, 0x0080}, /* 00,20,80,aa */ {0xaa, 0x22, 0x0080}, /* 00,22,80,aa */ {0xaa, 0x24, 0x0080}, /* 00,24,80,aa */ {0xaa, 0x26, 0x0080}, /* 00,26,80,aa */ {0xaa, 0x00, 0x0084}, /* 00,00,84,aa */ {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,02,cc */ {0xa0, 0x00, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,00,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x20, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,20,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x10, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,10,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,00,cc */ {0xa0, 0xc1, ZC3XX_R01D_HSYNC_0}, /* 00,1d,c1,cc */ {0xa0, 0xd4, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d4,cc */ {0xa0, 0xe8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,e8,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {} }; static const struct usb_action icm105a_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0x0d, 0x0003}, /* 00,0d,03,aa */ {0xaa, 0x0c, 0x0004}, /* 00,0c,04,aa */ {0xaa, 0x0e, 0x0081}, /* 00,0e,81,aa */ {0xaa, 0x0f, 0x0002}, /* 00,0f,02,aa */ {0xaa, 0x1c, 0x0080}, /* 00,1c,80,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x20, 0x0080}, /* 00,20,80,aa */ {0xaa, 0x22, 0x0080}, /* 00,22,80,aa */ {0xaa, 0x24, 0x0080}, /* 00,24,80,aa */ {0xaa, 0x26, 0x0080}, /* 00,26,80,aa */ {0xaa, 0x00, 0x0084}, /* 00,00,84,aa */ {0xa0, 0x02, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,02,cc */ {0xa0, 0x80, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,80,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x20, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,20,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x10, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,10,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,00,cc */ {0xa0, 0xc1, ZC3XX_R01D_HSYNC_0}, /* 00,1d,c1,cc */ {0xa0, 0xd4, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d4,cc */ {0xa0, 0xe8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,e8,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN}, /* 01,a7,00,cc */ {0xa0, 0xc0, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,c0,cc */ {} }; static const struct usb_action mc501cb_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, /* 00,02,00,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xd8, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,d8,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, /* 00,9b,01,cc */ {0xa0, 0xde, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,de,cc */ {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, /* 00,9d,02,cc */ {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,86,cc */ {0xa0, 0x33, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,33,cc */ {0xa0, 0x34, ZC3XX_R087_EXPTIMEMID}, /* 00,87,34,cc */ {0xa0, 0x35, ZC3XX_R088_EXPTIMELOW}, /* 00,88,35,cc */ {0xa0, 0xb0, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,b0,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xaa, 0x01, 0x0001}, /* 00,01,01,aa */ {0xaa, 0x01, 0x0003}, /* 00,01,03,aa */ {0xaa, 0x01, 0x0001}, /* 00,01,01,aa */ {0xaa, 0x03, 0x0000}, /* 00,03,00,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x11, 0x0080}, /* 00,11,80,aa */ {0xaa, 0x12, 0x0000}, /* 00,12,00,aa */ {0xaa, 0x13, 0x0000}, /* 00,13,00,aa */ {0xaa, 0x14, 0x0000}, /* 00,14,00,aa */ {0xaa, 0x15, 0x0000}, /* 00,15,00,aa */ {0xaa, 0x16, 0x0000}, /* 00,16,00,aa */ {0xaa, 0x17, 0x0001}, /* 00,17,01,aa */ {0xaa, 0x18, 0x00de}, /* 00,18,de,aa */ {0xaa, 0x19, 0x0002}, /* 00,19,02,aa */ {0xaa, 0x1a, 0x0086}, /* 00,1a,86,aa */ {0xaa, 0x20, 0x00a8}, /* 00,20,a8,aa */ {0xaa, 0x22, 0x0000}, /* 00,22,00,aa */ {0xaa, 0x23, 0x0000}, /* 00,23,00,aa */ {0xaa, 0x24, 0x0000}, /* 00,24,00,aa */ {0xaa, 0x40, 0x0033}, /* 00,40,33,aa */ {0xaa, 0x41, 0x0077}, /* 00,41,77,aa */ {0xaa, 0x42, 0x0053}, /* 00,42,53,aa */ {0xaa, 0x43, 0x00b0}, /* 00,43,b0,aa */ {0xaa, 0x4b, 0x0001}, /* 00,4b,01,aa */ {0xaa, 0x72, 0x0020}, /* 00,72,20,aa */ {0xaa, 0x73, 0x0000}, /* 00,73,00,aa */ {0xaa, 0x80, 0x0000}, /* 00,80,00,aa */ {0xaa, 0x85, 0x0050}, /* 00,85,50,aa */ {0xaa, 0x91, 0x0070}, /* 00,91,70,aa */ {0xaa, 0x92, 0x0072}, /* 00,92,72,aa */ {0xaa, 0x03, 0x0001}, /* 00,03,01,aa */ {0xaa, 0x10, 0x00a0}, /* 00,10,a0,aa */ {0xaa, 0x11, 0x0001}, /* 00,11,01,aa */ {0xaa, 0x30, 0x0000}, /* 00,30,00,aa */ {0xaa, 0x60, 0x0000}, /* 00,60,00,aa */ {0xaa, 0xa0, 0x001a}, /* 00,a0,1a,aa */ {0xaa, 0xa1, 0x0000}, /* 00,a1,00,aa */ {0xaa, 0xa2, 0x003f}, /* 00,a2,3f,aa */ {0xaa, 0xa3, 0x0028}, /* 00,a3,28,aa */ {0xaa, 0xa4, 0x0010}, /* 00,a4,10,aa */ {0xaa, 0xa5, 0x0020}, /* 00,a5,20,aa */ {0xaa, 0xb1, 0x0044}, /* 00,b1,44,aa */ {0xaa, 0xd0, 0x0001}, /* 00,d0,01,aa */ {0xaa, 0xd1, 0x0085}, /* 00,d1,85,aa */ {0xaa, 0xd2, 0x0080}, /* 00,d2,80,aa */ {0xaa, 0xd3, 0x0080}, /* 00,d3,80,aa */ {0xaa, 0xd4, 0x0080}, /* 00,d4,80,aa */ {0xaa, 0xd5, 0x0080}, /* 00,d5,80,aa */ {0xaa, 0xc0, 0x00c3}, /* 00,c0,c3,aa */ {0xaa, 0xc2, 0x0044}, /* 00,c2,44,aa */ {0xaa, 0xc4, 0x0040}, /* 00,c4,40,aa */ {0xaa, 0xc5, 0x0020}, /* 00,c5,20,aa */ {0xaa, 0xc6, 0x0008}, /* 00,c6,08,aa */ {0xaa, 0x03, 0x0004}, /* 00,03,04,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x40, 0x0030}, /* 00,40,30,aa */ {0xaa, 0x41, 0x0020}, /* 00,41,20,aa */ {0xaa, 0x42, 0x002d}, /* 00,42,2d,aa */ {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x1c, 0x0050}, /* 00,1C,50,aa */ {0xaa, 0x11, 0x0081}, /* 00,11,81,aa */ {0xaa, 0x3b, 0x001d}, /* 00,3b,1D,aa */ {0xaa, 0x3c, 0x004c}, /* 00,3c,4C,aa */ {0xaa, 0x3d, 0x0018}, /* 00,3d,18,aa */ {0xaa, 0x3e, 0x006a}, /* 00,3e,6A,aa */ {0xaa, 0x01, 0x0000}, /* 00,01,00,aa */ {0xaa, 0x52, 0x00ff}, /* 00,52,FF,aa */ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,37,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,02,cc */ {0xaa, 0x03, 0x0002}, /* 00,03,02,aa */ {0xaa, 0x51, 0x0027}, /* 00,51,27,aa */ {0xaa, 0x52, 0x0020}, /* 00,52,20,aa */ {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x50, 0x0010}, /* 00,50,10,aa */ {0xaa, 0x51, 0x0010}, /* 00,51,10,aa */ {0xaa, 0x54, 0x0010}, /* 00,54,10,aa */ {0xaa, 0x55, 0x0010}, /* 00,55,10,aa */ {0xa0, 0xf0, 0x0199}, /* 01,99,F0,cc */ {0xa0, 0x80, 0x019a}, /* 01,9A,80,cc */ {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x10, 0x00fc}, /* 00,10,fc,aa */ {0xaa, 0x36, 0x001d}, /* 00,36,1D,aa */ {0xaa, 0x37, 0x004c}, /* 00,37,4C,aa */ {0xaa, 0x3b, 0x001d}, /* 00,3B,1D,aa */ {} }; static const struct usb_action mc501cb_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, /* 00,02,10,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xd0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,d0,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, /* 00,9b,01,cc */ {0xa0, 0xd8, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,d8,cc */ {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, /* 00,9d,02,cc */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,88,cc */ {0xa0, 0x33, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,33,cc */ {0xa0, 0x34, ZC3XX_R087_EXPTIMEMID}, /* 00,87,34,cc */ {0xa0, 0x35, ZC3XX_R088_EXPTIMELOW}, /* 00,88,35,cc */ {0xa0, 0xb0, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,b0,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xaa, 0x01, 0x0001}, /* 00,01,01,aa */ {0xaa, 0x01, 0x0003}, /* 00,01,03,aa */ {0xaa, 0x01, 0x0001}, /* 00,01,01,aa */ {0xaa, 0x03, 0x0000}, /* 00,03,00,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x11, 0x0080}, /* 00,11,80,aa */ {0xaa, 0x12, 0x0000}, /* 00,12,00,aa */ {0xaa, 0x13, 0x0000}, /* 00,13,00,aa */ {0xaa, 0x14, 0x0000}, /* 00,14,00,aa */ {0xaa, 0x15, 0x0000}, /* 00,15,00,aa */ {0xaa, 0x16, 0x0000}, /* 00,16,00,aa */ {0xaa, 0x17, 0x0001}, /* 00,17,01,aa */ {0xaa, 0x18, 0x00d8}, /* 00,18,d8,aa */ {0xaa, 0x19, 0x0002}, /* 00,19,02,aa */ {0xaa, 0x1a, 0x0088}, /* 00,1a,88,aa */ {0xaa, 0x20, 0x00a8}, /* 00,20,a8,aa */ {0xaa, 0x22, 0x0000}, /* 00,22,00,aa */ {0xaa, 0x23, 0x0000}, /* 00,23,00,aa */ {0xaa, 0x24, 0x0000}, /* 00,24,00,aa */ {0xaa, 0x40, 0x0033}, /* 00,40,33,aa */ {0xaa, 0x41, 0x0077}, /* 00,41,77,aa */ {0xaa, 0x42, 0x0053}, /* 00,42,53,aa */ {0xaa, 0x43, 0x00b0}, /* 00,43,b0,aa */ {0xaa, 0x4b, 0x0001}, /* 00,4b,01,aa */ {0xaa, 0x72, 0x0020}, /* 00,72,20,aa */ {0xaa, 0x73, 0x0000}, /* 00,73,00,aa */ {0xaa, 0x80, 0x0000}, /* 00,80,00,aa */ {0xaa, 0x85, 0x0050}, /* 00,85,50,aa */ {0xaa, 0x91, 0x0070}, /* 00,91,70,aa */ {0xaa, 0x92, 0x0072}, /* 00,92,72,aa */ {0xaa, 0x03, 0x0001}, /* 00,03,01,aa */ {0xaa, 0x10, 0x00a0}, /* 00,10,a0,aa */ {0xaa, 0x11, 0x0001}, /* 00,11,01,aa */ {0xaa, 0x30, 0x0000}, /* 00,30,00,aa */ {0xaa, 0x60, 0x0000}, /* 00,60,00,aa */ {0xaa, 0xa0, 0x001a}, /* 00,a0,1a,aa */ {0xaa, 0xa1, 0x0000}, /* 00,a1,00,aa */ {0xaa, 0xa2, 0x003f}, /* 00,a2,3f,aa */ {0xaa, 0xa3, 0x0028}, /* 00,a3,28,aa */ {0xaa, 0xa4, 0x0010}, /* 00,a4,10,aa */ {0xaa, 0xa5, 0x0020}, /* 00,a5,20,aa */ {0xaa, 0xb1, 0x0044}, /* 00,b1,44,aa */ {0xaa, 0xd0, 0x0001}, /* 00,d0,01,aa */ {0xaa, 0xd1, 0x0085}, /* 00,d1,85,aa */ {0xaa, 0xd2, 0x0080}, /* 00,d2,80,aa */ {0xaa, 0xd3, 0x0080}, /* 00,d3,80,aa */ {0xaa, 0xd4, 0x0080}, /* 00,d4,80,aa */ {0xaa, 0xd5, 0x0080}, /* 00,d5,80,aa */ {0xaa, 0xc0, 0x00c3}, /* 00,c0,c3,aa */ {0xaa, 0xc2, 0x0044}, /* 00,c2,44,aa */ {0xaa, 0xc4, 0x0040}, /* 00,c4,40,aa */ {0xaa, 0xc5, 0x0020}, /* 00,c5,20,aa */ {0xaa, 0xc6, 0x0008}, /* 00,c6,08,aa */ {0xaa, 0x03, 0x0004}, /* 00,03,04,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x40, 0x0030}, /* 00,40,30,aa */ {0xaa, 0x41, 0x0020}, /* 00,41,20,aa */ {0xaa, 0x42, 0x002d}, /* 00,42,2d,aa */ {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x1c, 0x0050}, /* 00,1c,50,aa */ {0xaa, 0x11, 0x0081}, /* 00,11,81,aa */ {0xaa, 0x3b, 0x003a}, /* 00,3b,3A,aa */ {0xaa, 0x3c, 0x0098}, /* 00,3c,98,aa */ {0xaa, 0x3d, 0x0030}, /* 00,3d,30,aa */ {0xaa, 0x3e, 0x00d4}, /* 00,3E,D4,aa */ {0xaa, 0x01, 0x0000}, /* 00,01,00,aa */ {0xaa, 0x52, 0x00ff}, /* 00,52,FF,aa */ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,37,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,02,cc */ {0xaa, 0x03, 0x0002}, /* 00,03,02,aa */ {0xaa, 0x51, 0x004e}, /* 00,51,4E,aa */ {0xaa, 0x52, 0x0041}, /* 00,52,41,aa */ {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x50, 0x0010}, /* 00,50,10,aa */ {0xaa, 0x51, 0x0010}, /* 00,51,10,aa */ {0xaa, 0x54, 0x0010}, /* 00,54,10,aa */ {0xaa, 0x55, 0x0010}, /* 00,55,10,aa */ {0xa0, 0xf0, 0x0199}, /* 01,99,F0,cc */ {0xa0, 0x80, 0x019a}, /* 01,9A,80,cc */ {} }; static const struct usb_action mc501cb_50HZ[] = { {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x10, 0x00fc}, /* 00,10,fc,aa */ {0xaa, 0x36, 0x001d}, /* 00,36,1D,aa */ {0xaa, 0x37, 0x004c}, /* 00,37,4C,aa */ {0xaa, 0x3b, 0x001d}, /* 00,3B,1D,aa */ {0xaa, 0x3c, 0x004c}, /* 00,3C,4C,aa */ {0xaa, 0x3d, 0x001d}, /* 00,3D,1D,aa */ {0xaa, 0x3e, 0x004c}, /* 00,3E,4C,aa */ {} }; static const struct usb_action mc501cb_50HZScale[] = { {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x10, 0x00fc}, /* 00,10,fc,aa */ {0xaa, 0x36, 0x003a}, /* 00,36,3A,aa */ {0xaa, 0x37, 0x0098}, /* 00,37,98,aa */ {0xaa, 0x3b, 0x003a}, /* 00,3B,3A,aa */ {0xaa, 0x3c, 0x0098}, /* 00,3C,98,aa */ {0xaa, 0x3d, 0x003a}, /* 00,3D,3A,aa */ {0xaa, 0x3e, 0x0098}, /* 00,3E,98,aa */ {} }; static const struct usb_action mc501cb_60HZ[] = { {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x10, 0x00fc}, /* 00,10,fc,aa */ {0xaa, 0x36, 0x0018}, /* 00,36,18,aa */ {0xaa, 0x37, 0x006a}, /* 00,37,6A,aa */ {0xaa, 0x3d, 0x0018}, /* 00,3D,18,aa */ {0xaa, 0x3e, 0x006a}, /* 00,3E,6A,aa */ {0xaa, 0x3b, 0x0018}, /* 00,3B,18,aa */ {0xaa, 0x3c, 0x006a}, /* 00,3C,6A,aa */ {} }; static const struct usb_action mc501cb_60HZScale[] = { {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x10, 0x00fc}, /* 00,10,fc,aa */ {0xaa, 0x36, 0x0030}, /* 00,36,30,aa */ {0xaa, 0x37, 0x00d4}, /* 00,37,D4,aa */ {0xaa, 0x3d, 0x0030}, /* 00,3D,30,aa */ {0xaa, 0x3e, 0x00d4}, /* 00,3E,D4,aa */ {0xaa, 0x3b, 0x0030}, /* 00,3B,30,aa */ {0xaa, 0x3c, 0x00d4}, /* 00,3C,D4,aa */ {} }; static const struct usb_action mc501cb_NoFlicker[] = { {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x10, 0x00fc}, /* 00,10,fc,aa */ {0xaa, 0x36, 0x0018}, /* 00,36,18,aa */ {0xaa, 0x37, 0x006a}, /* 00,37,6A,aa */ {0xaa, 0x3d, 0x0018}, /* 00,3D,18,aa */ {0xaa, 0x3e, 0x006a}, /* 00,3E,6A,aa */ {0xaa, 0x3b, 0x0018}, /* 00,3B,18,aa */ {0xaa, 0x3c, 0x006a}, /* 00,3C,6A,aa */ {} }; static const struct usb_action mc501cb_NoFlickerScale[] = { {0xaa, 0x03, 0x0003}, /* 00,03,03,aa */ {0xaa, 0x10, 0x00fc}, /* 00,10,fc,aa */ {0xaa, 0x36, 0x0030}, /* 00,36,30,aa */ {0xaa, 0x37, 0x00d4}, /* 00,37,D4,aa */ {0xaa, 0x3d, 0x0030}, /* 00,3D,30,aa */ {0xaa, 0x3e, 0x00d4}, /* 00,3E,D4,aa */ {0xaa, 0x3b, 0x0030}, /* 00,3B,30,aa */ {0xaa, 0x3c, 0x00d4}, /* 00,3C,D4,aa */ {} }; /* from zs211.inf */ static const struct usb_action ov7620_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x40, ZC3XX_R002_CLOCKSELECT}, /* 00,02,40,cc */ {0xa0, 0x00, ZC3XX_R008_CLOCKSETTING}, /* 00,08,00,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x06, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,06,cc */ {0xa0, 0x02, ZC3XX_R083_RGAINADDR}, /* 00,83,02,cc */ {0xa0, 0x01, ZC3XX_R085_BGAINADDR}, /* 00,85,01,cc */ {0xa0, 0x80, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,80,cc */ {0xa0, 0x81, ZC3XX_R087_EXPTIMEMID}, /* 00,87,81,cc */ {0xa0, 0x10, ZC3XX_R088_EXPTIMELOW}, /* 00,88,10,cc */ {0xa0, 0xa1, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,a1,cc */ {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, /* 00,8d,08,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xd8, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,d8,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xde, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,de,cc */ {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,86,cc */ {0xaa, 0x12, 0x0088}, /* 00,12,88,aa */ {0xaa, 0x12, 0x0048}, /* 00,12,48,aa */ {0xaa, 0x75, 0x008a}, /* 00,75,8a,aa */ {0xaa, 0x13, 0x00a3}, /* 00,13,a3,aa */ {0xaa, 0x04, 0x0000}, /* 00,04,00,aa */ {0xaa, 0x05, 0x0000}, /* 00,05,00,aa */ {0xaa, 0x14, 0x0000}, /* 00,14,00,aa */ {0xaa, 0x15, 0x0004}, /* 00,15,04,aa */ {0xaa, 0x17, 0x0018}, /* 00,17,18,aa */ {0xaa, 0x18, 0x00ba}, /* 00,18,ba,aa */ {0xaa, 0x19, 0x0002}, /* 00,19,02,aa */ {0xaa, 0x1a, 0x00f1}, /* 00,1a,f1,aa */ {0xaa, 0x20, 0x0040}, /* 00,20,40,aa */ {0xaa, 0x24, 0x0088}, /* 00,24,88,aa */ {0xaa, 0x25, 0x0078}, /* 00,25,78,aa */ {0xaa, 0x27, 0x00f6}, /* 00,27,f6,aa */ {0xaa, 0x28, 0x00a0}, /* 00,28,a0,aa */ {0xaa, 0x21, 0x0000}, /* 00,21,00,aa */ {0xaa, 0x2a, 0x0083}, /* 00,2a,83,aa */ {0xaa, 0x2b, 0x0096}, /* 00,2b,96,aa */ {0xaa, 0x2d, 0x0005}, /* 00,2d,05,aa */ {0xaa, 0x74, 0x0020}, /* 00,74,20,aa */ {0xaa, 0x61, 0x0068}, /* 00,61,68,aa */ {0xaa, 0x64, 0x0088}, /* 00,64,88,aa */ {0xaa, 0x00, 0x0000}, /* 00,00,00,aa */ {0xaa, 0x06, 0x0080}, /* 00,06,80,aa */ {0xaa, 0x01, 0x0090}, /* 00,01,90,aa */ {0xaa, 0x02, 0x0030}, /* 00,02,30,aa */ {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,77,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x00, 0x01ad}, /* 01,ad,00,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x68, ZC3XX_R116_RGAIN}, /* 01,16,68,cc */ {0xa0, 0x52, ZC3XX_R118_BGAIN}, /* 01,18,52,cc */ {0xa0, 0x40, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,40,cc */ {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,02,cc */ {0xa0, 0x50, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,50,cc */ {} }; static const struct usb_action ov7620_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x50, ZC3XX_R002_CLOCKSELECT}, /* 00,02,50,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00,08,00,cc */ /* mx change? */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x06, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,06,cc */ {0xa0, 0x02, ZC3XX_R083_RGAINADDR}, /* 00,83,02,cc */ {0xa0, 0x01, ZC3XX_R085_BGAINADDR}, /* 00,85,01,cc */ {0xa0, 0x80, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,80,cc */ {0xa0, 0x81, ZC3XX_R087_EXPTIMEMID}, /* 00,87,81,cc */ {0xa0, 0x10, ZC3XX_R088_EXPTIMELOW}, /* 00,88,10,cc */ {0xa0, 0xa1, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,a1,cc */ {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, /* 00,8d,08,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xd0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,d0,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xd6, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,d6,cc */ /* OV7648 00,9c,d8,cc */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,88,cc */ {0xaa, 0x12, 0x0088}, /* 00,12,88,aa */ {0xaa, 0x12, 0x0048}, /* 00,12,48,aa */ {0xaa, 0x75, 0x008a}, /* 00,75,8a,aa */ {0xaa, 0x13, 0x00a3}, /* 00,13,a3,aa */ {0xaa, 0x04, 0x0000}, /* 00,04,00,aa */ {0xaa, 0x05, 0x0000}, /* 00,05,00,aa */ {0xaa, 0x14, 0x0000}, /* 00,14,00,aa */ {0xaa, 0x15, 0x0004}, /* 00,15,04,aa */ {0xaa, 0x24, 0x0088}, /* 00,24,88,aa */ {0xaa, 0x25, 0x0078}, /* 00,25,78,aa */ {0xaa, 0x17, 0x0018}, /* 00,17,18,aa */ {0xaa, 0x18, 0x00ba}, /* 00,18,ba,aa */ {0xaa, 0x19, 0x0002}, /* 00,19,02,aa */ {0xaa, 0x1a, 0x00f2}, /* 00,1a,f2,aa */ {0xaa, 0x20, 0x0040}, /* 00,20,40,aa */ {0xaa, 0x27, 0x00f6}, /* 00,27,f6,aa */ {0xaa, 0x28, 0x00a0}, /* 00,28,a0,aa */ {0xaa, 0x21, 0x0000}, /* 00,21,00,aa */ {0xaa, 0x2a, 0x0083}, /* 00,2a,83,aa */ {0xaa, 0x2b, 0x0096}, /* 00,2b,96,aa */ {0xaa, 0x2d, 0x0005}, /* 00,2d,05,aa */ {0xaa, 0x74, 0x0020}, /* 00,74,20,aa */ {0xaa, 0x61, 0x0068}, /* 00,61,68,aa */ {0xaa, 0x64, 0x0088}, /* 00,64,88,aa */ {0xaa, 0x00, 0x0000}, /* 00,00,00,aa */ {0xaa, 0x06, 0x0080}, /* 00,06,80,aa */ {0xaa, 0x01, 0x0090}, /* 00,01,90,aa */ {0xaa, 0x02, 0x0030}, /* 00,02,30,aa */ {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,77,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x00, 0x01ad}, /* 01,ad,00,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x68, ZC3XX_R116_RGAIN}, /* 01,16,68,cc */ {0xa0, 0x52, ZC3XX_R118_BGAIN}, /* 01,18,52,cc */ {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,50,cc */ {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,02,cc */ {0xa0, 0x50, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,50,cc */ {} }; static const struct usb_action ov7620_50HZ[] = { {0xdd, 0x00, 0x0100}, /* 00,01,00,dd */ {0xaa, 0x2b, 0x0096}, /* 00,2b,96,aa */ /* enable 1/120s & 1/100s exposures for banding filter */ {0xaa, 0x75, 0x008e}, {0xaa, 0x2d, 0x0005}, /* 00,2d,05,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x18, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,18,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x83, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,83,cc */ {0xaa, 0x76, 0x0003}, /* 00,76,03,aa */ /* {0xa0, 0x40, ZC3XX_R002_CLOCKSELECT}, * 00,02,40,cc * if mode0 (640x480) */ {} }; static const struct usb_action ov7620_60HZ[] = { {0xdd, 0x00, 0x0100}, /* 00,01,00,dd */ {0xaa, 0x2b, 0x0000}, /* 00,2b,00,aa */ /* enable 1/120s & 1/100s exposures for banding filter */ {0xaa, 0x75, 0x008e}, {0xaa, 0x2d, 0x0005}, /* 00,2d,05,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x18, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,18,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x83, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,83,cc */ {0xaa, 0x76, 0x0003}, /* 00,76,03,aa */ /* {0xa0, 0x40, ZC3XX_R002_CLOCKSELECT}, * 00,02,40,cc * if mode0 (640x480) */ /* ?? in gspca v1, it was {0xa0, 0x00, 0x0039}, * 00,00,00,dd * {0xa1, 0x01, 0x0037}, */ {} }; static const struct usb_action ov7620_NoFlicker[] = { {0xdd, 0x00, 0x0100}, /* 00,01,00,dd */ {0xaa, 0x2b, 0x0000}, /* 00,2b,00,aa */ /* disable 1/120s & 1/100s exposures for banding filter */ {0xaa, 0x75, 0x008a}, {0xaa, 0x2d, 0x0001}, /* 00,2d,01,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,04,cc */ {0xa0, 0x18, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,18,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x01, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,01,cc */ /* {0xa0, 0x44, ZC3XX_R002_CLOCKSELECT}, * 00,02,44,cc * if mode1 (320x240) */ /* ?? was {0xa0, 0x00, 0x0039}, * 00,00,00,dd * {0xa1, 0x01, 0x0037}, */ {} }; static const struct usb_action ov7630c_InitialScale[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x06, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0xa1, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x12, 0x0080}, {0xa0, 0x02, ZC3XX_R083_RGAINADDR}, {0xa0, 0x01, ZC3XX_R085_BGAINADDR}, {0xa0, 0x90, ZC3XX_R086_EXPTIMEHIGH}, {0xa0, 0x91, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x10, ZC3XX_R088_EXPTIMELOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xd8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, {0xaa, 0x12, 0x0069}, {0xaa, 0x04, 0x0020}, {0xaa, 0x06, 0x0050}, {0xaa, 0x13, 0x0083}, {0xaa, 0x14, 0x0000}, {0xaa, 0x15, 0x0024}, {0xaa, 0x17, 0x0018}, {0xaa, 0x18, 0x00ba}, {0xaa, 0x19, 0x0002}, {0xaa, 0x1a, 0x00f6}, {0xaa, 0x1b, 0x0002}, {0xaa, 0x20, 0x00c2}, {0xaa, 0x24, 0x0060}, {0xaa, 0x25, 0x0040}, {0xaa, 0x26, 0x0030}, {0xaa, 0x27, 0x00ea}, {0xaa, 0x28, 0x00a0}, {0xaa, 0x21, 0x0000}, {0xaa, 0x2a, 0x0081}, {0xaa, 0x2b, 0x0096}, {0xaa, 0x2d, 0x0094}, {0xaa, 0x2f, 0x003d}, {0xaa, 0x30, 0x0024}, {0xaa, 0x60, 0x0000}, {0xaa, 0x61, 0x0040}, {0xaa, 0x68, 0x007c}, {0xaa, 0x6f, 0x0015}, {0xaa, 0x75, 0x0088}, {0xaa, 0x77, 0x00b5}, {0xaa, 0x01, 0x0060}, {0xaa, 0x02, 0x0060}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R116_RGAIN}, {0xa0, 0x46, ZC3XX_R118_BGAIN}, {0xa0, 0x04, ZC3XX_R113_RGB03}, /* 0x10, */ {0xa1, 0x01, 0x0002}, {0xa0, 0x50, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf8, ZC3XX_R10B_RGB01}, {0xa0, 0xf8, ZC3XX_R10C_RGB02}, {0xa0, 0xf8, ZC3XX_R10D_RGB10}, {0xa0, 0x50, ZC3XX_R10E_RGB11}, {0xa0, 0xf8, ZC3XX_R10F_RGB12}, {0xa0, 0xf8, ZC3XX_R110_RGB20}, {0xa0, 0xf8, ZC3XX_R111_RGB21}, {0xa0, 0x50, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0008}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa1, 0x01, 0x01c8}, {0xa1, 0x01, 0x01c9}, {0xa1, 0x01, 0x01ca}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x01, ZC3XX_R120_GAMMA00}, /* gamma 2 ?*/ {0xa0, 0x0c, ZC3XX_R121_GAMMA01}, {0xa0, 0x1f, ZC3XX_R122_GAMMA02}, {0xa0, 0x3a, ZC3XX_R123_GAMMA03}, {0xa0, 0x53, ZC3XX_R124_GAMMA04}, {0xa0, 0x6d, ZC3XX_R125_GAMMA05}, {0xa0, 0x85, ZC3XX_R126_GAMMA06}, {0xa0, 0x9c, ZC3XX_R127_GAMMA07}, {0xa0, 0xb0, ZC3XX_R128_GAMMA08}, {0xa0, 0xc2, ZC3XX_R129_GAMMA09}, {0xa0, 0xd1, ZC3XX_R12A_GAMMA0A}, {0xa0, 0xde, ZC3XX_R12B_GAMMA0B}, {0xa0, 0xe9, ZC3XX_R12C_GAMMA0C}, {0xa0, 0xf2, ZC3XX_R12D_GAMMA0D}, {0xa0, 0xf9, ZC3XX_R12E_GAMMA0E}, {0xa0, 0xff, ZC3XX_R12F_GAMMA0F}, {0xa0, 0x05, ZC3XX_R130_GAMMA10}, {0xa0, 0x0f, ZC3XX_R131_GAMMA11}, {0xa0, 0x16, ZC3XX_R132_GAMMA12}, {0xa0, 0x1a, ZC3XX_R133_GAMMA13}, {0xa0, 0x19, ZC3XX_R134_GAMMA14}, {0xa0, 0x19, ZC3XX_R135_GAMMA15}, {0xa0, 0x17, ZC3XX_R136_GAMMA16}, {0xa0, 0x15, ZC3XX_R137_GAMMA17}, {0xa0, 0x12, ZC3XX_R138_GAMMA18}, {0xa0, 0x10, ZC3XX_R139_GAMMA19}, {0xa0, 0x0e, ZC3XX_R13A_GAMMA1A}, {0xa0, 0x0b, ZC3XX_R13B_GAMMA1B}, {0xa0, 0x09, ZC3XX_R13C_GAMMA1C}, {0xa0, 0x08, ZC3XX_R13D_GAMMA1D}, {0xa0, 0x06, ZC3XX_R13E_GAMMA1E}, {0xa0, 0x03, ZC3XX_R13F_GAMMA1F}, {0xa0, 0x50, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf8, ZC3XX_R10B_RGB01}, {0xa0, 0xf8, ZC3XX_R10C_RGB02}, {0xa0, 0xf8, ZC3XX_R10D_RGB10}, {0xa0, 0x50, ZC3XX_R10E_RGB11}, {0xa0, 0xf8, ZC3XX_R10F_RGB12}, {0xa0, 0xf8, ZC3XX_R110_RGB20}, {0xa0, 0xf8, ZC3XX_R111_RGB21}, {0xa0, 0x50, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0180}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xaa, 0x10, 0x001b}, {0xaa, 0x76, 0x0002}, {0xaa, 0x2a, 0x0081}, {0xaa, 0x2b, 0x0000}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x01, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xb8, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x37, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x26, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R180_AUTOCORRECTENABLE}, {0xaa, 0x13, 0x0083}, /* 40 */ {0xa1, 0x01, 0x0180}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action ov7630c_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x06, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0xa1, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x12, 0x0080}, {0xa0, 0x02, ZC3XX_R083_RGAINADDR}, {0xa0, 0x01, ZC3XX_R085_BGAINADDR}, {0xa0, 0x90, ZC3XX_R086_EXPTIMEHIGH}, {0xa0, 0x91, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x10, ZC3XX_R088_EXPTIMELOW}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xe6, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, {0xaa, 0x12, 0x0069}, /* i2c */ {0xaa, 0x04, 0x0020}, {0xaa, 0x06, 0x0050}, {0xaa, 0x13, 0x00c3}, {0xaa, 0x14, 0x0000}, {0xaa, 0x15, 0x0024}, {0xaa, 0x19, 0x0003}, {0xaa, 0x1a, 0x00f6}, {0xaa, 0x1b, 0x0002}, {0xaa, 0x20, 0x00c2}, {0xaa, 0x24, 0x0060}, {0xaa, 0x25, 0x0040}, {0xaa, 0x26, 0x0030}, {0xaa, 0x27, 0x00ea}, {0xaa, 0x28, 0x00a0}, {0xaa, 0x21, 0x0000}, {0xaa, 0x2a, 0x0081}, {0xaa, 0x2b, 0x0096}, {0xaa, 0x2d, 0x0084}, {0xaa, 0x2f, 0x003d}, {0xaa, 0x30, 0x0024}, {0xaa, 0x60, 0x0000}, {0xaa, 0x61, 0x0040}, {0xaa, 0x68, 0x007c}, {0xaa, 0x6f, 0x0015}, {0xaa, 0x75, 0x0088}, {0xaa, 0x77, 0x00b5}, {0xaa, 0x01, 0x0060}, {0xaa, 0x02, 0x0060}, {0xaa, 0x17, 0x0018}, {0xaa, 0x18, 0x00ba}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x77, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x04, ZC3XX_R1A7_CALCGLOBALMEAN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R116_RGAIN}, {0xa0, 0x46, ZC3XX_R118_BGAIN}, {0xa0, 0x04, ZC3XX_R113_RGB03}, {0xa1, 0x01, 0x0002}, {0xa0, 0x4e, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xfe, ZC3XX_R10B_RGB01}, {0xa0, 0xf4, ZC3XX_R10C_RGB02}, {0xa0, 0xf7, ZC3XX_R10D_RGB10}, {0xa0, 0x4d, ZC3XX_R10E_RGB11}, {0xa0, 0xfc, ZC3XX_R10F_RGB12}, {0xa0, 0x00, ZC3XX_R110_RGB20}, {0xa0, 0xf6, ZC3XX_R111_RGB21}, {0xa0, 0x4a, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0008}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* clock ? */ {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, /* sharpness+ */ {0xa1, 0x01, 0x01c8}, {0xa1, 0x01, 0x01c9}, {0xa1, 0x01, 0x01ca}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* sharpness- */ {0xa0, 0x16, ZC3XX_R120_GAMMA00}, /* gamma ~4 */ {0xa0, 0x3a, ZC3XX_R121_GAMMA01}, {0xa0, 0x5b, ZC3XX_R122_GAMMA02}, {0xa0, 0x7c, ZC3XX_R123_GAMMA03}, {0xa0, 0x94, ZC3XX_R124_GAMMA04}, {0xa0, 0xa9, ZC3XX_R125_GAMMA05}, {0xa0, 0xbb, ZC3XX_R126_GAMMA06}, {0xa0, 0xca, ZC3XX_R127_GAMMA07}, {0xa0, 0xd7, ZC3XX_R128_GAMMA08}, {0xa0, 0xe1, ZC3XX_R129_GAMMA09}, {0xa0, 0xea, ZC3XX_R12A_GAMMA0A}, {0xa0, 0xf1, ZC3XX_R12B_GAMMA0B}, {0xa0, 0xf7, ZC3XX_R12C_GAMMA0C}, {0xa0, 0xfc, ZC3XX_R12D_GAMMA0D}, {0xa0, 0xff, ZC3XX_R12E_GAMMA0E}, {0xa0, 0xff, ZC3XX_R12F_GAMMA0F}, {0xa0, 0x20, ZC3XX_R130_GAMMA10}, {0xa0, 0x22, ZC3XX_R131_GAMMA11}, {0xa0, 0x20, ZC3XX_R132_GAMMA12}, {0xa0, 0x1c, ZC3XX_R133_GAMMA13}, {0xa0, 0x16, ZC3XX_R134_GAMMA14}, {0xa0, 0x13, ZC3XX_R135_GAMMA15}, {0xa0, 0x10, ZC3XX_R136_GAMMA16}, {0xa0, 0x0d, ZC3XX_R137_GAMMA17}, {0xa0, 0x0b, ZC3XX_R138_GAMMA18}, {0xa0, 0x09, ZC3XX_R139_GAMMA19}, {0xa0, 0x07, ZC3XX_R13A_GAMMA1A}, {0xa0, 0x06, ZC3XX_R13B_GAMMA1B}, {0xa0, 0x05, ZC3XX_R13C_GAMMA1C}, {0xa0, 0x04, ZC3XX_R13D_GAMMA1D}, {0xa0, 0x00, ZC3XX_R13E_GAMMA1E}, {0xa0, 0x01, ZC3XX_R13F_GAMMA1F}, {0xa0, 0x4e, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xfe, ZC3XX_R10B_RGB01}, {0xa0, 0xf4, ZC3XX_R10C_RGB02}, {0xa0, 0xf7, ZC3XX_R10D_RGB10}, {0xa0, 0x4d, ZC3XX_R10E_RGB11}, {0xa0, 0xfc, ZC3XX_R10F_RGB12}, {0xa0, 0x00, ZC3XX_R110_RGB20}, {0xa0, 0xf6, ZC3XX_R111_RGB21}, {0xa0, 0x4a, ZC3XX_R112_RGB22}, {0xa1, 0x01, 0x0180}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xaa, 0x10, 0x000d}, {0xaa, 0x76, 0x0002}, {0xaa, 0x2a, 0x0081}, {0xaa, 0x2b, 0x0000}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x00, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xd8, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x1b, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x26, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x40, ZC3XX_R180_AUTOCORRECTENABLE}, {0xaa, 0x13, 0x00c3}, {0xa1, 0x01, 0x0180}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action pas106b_Initial_com[] = { /* Sream and Sensor specific */ {0xa1, 0x01, 0x0010}, /* CMOSSensorSelect */ /* System */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* SystemControl */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* SystemControl */ /* Picture size */ {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, /* ClockSelect */ {0xa0, 0x03, 0x003a}, {0xa0, 0x0c, 0x003b}, {0xa0, 0x04, 0x0038}, {} }; static const struct usb_action pas106b_InitialScale[] = { /* 176x144 */ /* JPEG control */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* Sream and Sensor specific */ {0xa0, 0x0f, ZC3XX_R010_CMOSSENSORSELECT}, /* Picture size */ {0xa0, 0x00, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0xb0, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x00, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0x90, ZC3XX_R006_FRAMEHEIGHTLOW}, /* System */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* Sream and Sensor specific */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* Sensor Interface */ {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, /* Window inside sensor array */ {0xa0, 0x03, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x03, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0x28, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x68, ZC3XX_R09E_WINWIDTHLOW}, /* Init the sensor */ {0xaa, 0x02, 0x0004}, {0xaa, 0x08, 0x0000}, {0xaa, 0x09, 0x0005}, {0xaa, 0x0a, 0x0002}, {0xaa, 0x0b, 0x0002}, {0xaa, 0x0c, 0x0005}, {0xaa, 0x0d, 0x0000}, {0xaa, 0x0e, 0x0002}, {0xaa, 0x14, 0x0081}, /* Other registers */ {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, /* Frame retrieving */ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* Gains */ {0xa0, 0xa0, ZC3XX_R1A8_DIGITALGAIN}, /* Unknown */ {0xa0, 0x00, 0x01ad}, /* Sharpness */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* Other registers */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* Auto exposure and white balance */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /*Dead pixels */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* EEPROM */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* JPEG control */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* Other registers */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* Auto exposure and white balance */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /*Dead pixels */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* EEPROM */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* JPEG control */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x58, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf4, ZC3XX_R10B_RGB01}, {0xa0, 0xf4, ZC3XX_R10C_RGB02}, {0xa0, 0xf4, ZC3XX_R10D_RGB10}, {0xa0, 0x58, ZC3XX_R10E_RGB11}, {0xa0, 0xf4, ZC3XX_R10F_RGB12}, {0xa0, 0xf4, ZC3XX_R110_RGB20}, {0xa0, 0xf4, ZC3XX_R111_RGB21}, {0xa0, 0x58, ZC3XX_R112_RGB22}, /* Auto correction */ {0xa0, 0x03, ZC3XX_R181_WINXSTART}, {0xa0, 0x08, ZC3XX_R182_WINXWIDTH}, {0xa0, 0x16, ZC3XX_R183_WINXCENTER}, {0xa0, 0x03, ZC3XX_R184_WINYSTART}, {0xa0, 0x05, ZC3XX_R185_WINYWIDTH}, {0xa0, 0x14, ZC3XX_R186_WINYCENTER}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, /* Auto exposure and white balance */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x03, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xb1, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x87, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, /* sensor on */ {0xaa, 0x07, 0x00b1}, {0xaa, 0x05, 0x0003}, {0xaa, 0x04, 0x0001}, {0xaa, 0x03, 0x003b}, /* Gains */ {0xa0, 0x20, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x26, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xa0, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, /* Auto correction */ {0xa0, 0x40, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa1, 0x01, 0x0180}, /* AutoCorrectEnable */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* Gains */ {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action pas106b_Initial[] = { /* 352x288 */ /* JPEG control */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* Sream and Sensor specific */ {0xa0, 0x0f, ZC3XX_R010_CMOSSENSORSELECT}, /* Picture size */ {0xa0, 0x01, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x60, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0x20, ZC3XX_R006_FRAMEHEIGHTLOW}, /* System */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* Sream and Sensor specific */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* Sensor Interface */ {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, /* Window inside sensor array */ {0xa0, 0x03, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x03, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0x28, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x68, ZC3XX_R09E_WINWIDTHLOW}, /* Init the sensor */ {0xaa, 0x02, 0x0004}, {0xaa, 0x08, 0x0000}, {0xaa, 0x09, 0x0005}, {0xaa, 0x0a, 0x0002}, {0xaa, 0x0b, 0x0002}, {0xaa, 0x0c, 0x0005}, {0xaa, 0x0d, 0x0000}, {0xaa, 0x0e, 0x0002}, {0xaa, 0x14, 0x0081}, /* Other registers */ {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, /* Frame retrieving */ {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* Gains */ {0xa0, 0xa0, ZC3XX_R1A8_DIGITALGAIN}, /* Unknown */ {0xa0, 0x00, 0x01ad}, /* Sharpness */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* Other registers */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* Auto exposure and white balance */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x80, ZC3XX_R18D_YTARGET}, /*Dead pixels */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* EEPROM */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* JPEG control */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, /* Other registers */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* Auto exposure and white balance */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /*Dead pixels */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* EEPROM */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* JPEG control */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x08, ZC3XX_R1C6_SHARPNESS00}, {0xa0, 0x0f, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x58, ZC3XX_R10A_RGB00}, /* matrix */ {0xa0, 0xf4, ZC3XX_R10B_RGB01}, {0xa0, 0xf4, ZC3XX_R10C_RGB02}, {0xa0, 0xf4, ZC3XX_R10D_RGB10}, {0xa0, 0x58, ZC3XX_R10E_RGB11}, {0xa0, 0xf4, ZC3XX_R10F_RGB12}, {0xa0, 0xf4, ZC3XX_R110_RGB20}, {0xa0, 0xf4, ZC3XX_R111_RGB21}, {0xa0, 0x58, ZC3XX_R112_RGB22}, /* Auto correction */ {0xa0, 0x03, ZC3XX_R181_WINXSTART}, {0xa0, 0x08, ZC3XX_R182_WINXWIDTH}, {0xa0, 0x16, ZC3XX_R183_WINXCENTER}, {0xa0, 0x03, ZC3XX_R184_WINYSTART}, {0xa0, 0x05, ZC3XX_R185_WINYWIDTH}, {0xa0, 0x14, ZC3XX_R186_WINYCENTER}, {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, /* Auto exposure and white balance */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x03, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xb1, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x87, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* sensor on */ {0xaa, 0x07, 0x00b1}, {0xaa, 0x05, 0x0003}, {0xaa, 0x04, 0x0001}, {0xaa, 0x03, 0x003b}, /* Gains */ {0xa0, 0x20, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x26, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, /* Auto correction */ {0xa0, 0x40, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa1, 0x01, 0x0180}, /* AutoCorrectEnable */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* Gains */ {0xa0, 0x40, ZC3XX_R116_RGAIN}, {0xa0, 0x40, ZC3XX_R117_GGAIN}, {0xa0, 0x40, ZC3XX_R118_BGAIN}, {0xa0, 0x00, 0x0007}, /* AutoCorrectEnable */ {0xa0, 0xff, ZC3XX_R018_FRAMELOST}, /* Frame adjust */ {} }; static const struct usb_action pas106b_50HZ[] = { {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x06, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,06,cc */ {0xa0, 0x54, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,54,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x87, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,87,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x30, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,30,cc */ {0xaa, 0x03, 0x0021}, /* 00,03,21,aa */ {0xaa, 0x04, 0x000c}, /* 00,04,0c,aa */ {0xaa, 0x05, 0x0002}, /* 00,05,02,aa */ {0xaa, 0x07, 0x001c}, /* 00,07,1c,aa */ {0xa0, 0x04, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,04,cc */ {} }; static const struct usb_action pas106b_60HZ[] = { {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x06, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,06,cc */ {0xa0, 0x2e, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,2e,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x71, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,71,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x30, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,30,cc */ {0xaa, 0x03, 0x001c}, /* 00,03,1c,aa */ {0xaa, 0x04, 0x0004}, /* 00,04,04,aa */ {0xaa, 0x05, 0x0001}, /* 00,05,01,aa */ {0xaa, 0x07, 0x00c4}, /* 00,07,c4,aa */ {0xa0, 0x04, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,04,cc */ {} }; static const struct usb_action pas106b_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x06, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,06,cc */ {0xa0, 0x50, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,50,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x10, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,10,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xaa, 0x03, 0x0013}, /* 00,03,13,aa */ {0xaa, 0x04, 0x0000}, /* 00,04,00,aa */ {0xaa, 0x05, 0x0001}, /* 00,05,01,aa */ {0xaa, 0x07, 0x0030}, /* 00,07,30,aa */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {} }; /* from lvWIMv.inf 046d:08a2/:08aa 2007/06/03 */ static const struct usb_action pas202b_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0e, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0e,cc */ {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, /* 00,02,00,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,e0,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, /* 00,8d,08,cc */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x03, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,03,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x03, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,03,cc */ {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, /* 00,9b,01,cc */ {0xa0, 0xe6, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,e6,cc */ {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, /* 00,9d,02,cc */ {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,86,cc */ {0xaa, 0x02, 0x0002}, /* 00,02,04,aa --> 02 */ {0xaa, 0x07, 0x0006}, /* 00,07,06,aa */ {0xaa, 0x08, 0x0002}, /* 00,08,02,aa */ {0xaa, 0x09, 0x0006}, /* 00,09,06,aa */ {0xaa, 0x0a, 0x0001}, /* 00,0a,01,aa */ {0xaa, 0x0b, 0x0001}, /* 00,0b,01,aa */ {0xaa, 0x0c, 0x0006}, {0xaa, 0x0d, 0x0000}, /* 00,0d,00,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x12, 0x0005}, /* 00,12,05,aa */ {0xaa, 0x13, 0x0063}, /* 00,13,63,aa */ {0xaa, 0x15, 0x0070}, /* 00,15,70,aa */ {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,b7,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x00, 0x01ad}, /* 01,ad,00,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x70, ZC3XX_R18D_YTARGET}, /* 01,8d,70,cc */ {} }; static const struct usb_action pas202b_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0e, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,0e,cc */ {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, /* 00,02,10,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x08, ZC3XX_R08D_COMPABILITYMODE}, /* 00,8d,08,cc */ {0xa0, 0x08, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,08,cc */ {0xa0, 0x02, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,02,cc */ {0xa0, 0x08, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,08,cc */ {0xa0, 0x02, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,02,cc */ {0xa0, 0x01, ZC3XX_R09B_WINHEIGHTHIGH}, /* 00,9b,01,cc */ {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, /* 00,9d,02,cc */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,88,cc */ {0xaa, 0x02, 0x0002}, /* 00,02,02,aa */ {0xaa, 0x07, 0x0006}, /* 00,07,06,aa */ {0xaa, 0x08, 0x0002}, /* 00,08,02,aa */ {0xaa, 0x09, 0x0006}, /* 00,09,06,aa */ {0xaa, 0x0a, 0x0001}, /* 00,0a,01,aa */ {0xaa, 0x0b, 0x0001}, /* 00,0b,01,aa */ {0xaa, 0x0c, 0x0006}, {0xaa, 0x0d, 0x0000}, /* 00,0d,00,aa */ {0xaa, 0x10, 0x0000}, /* 00,10,00,aa */ {0xaa, 0x12, 0x0005}, /* 00,12,05,aa */ {0xaa, 0x13, 0x0063}, /* 00,13,63,aa */ {0xaa, 0x15, 0x0070}, /* 00,15,70,aa */ {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,37,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x00, 0x01ad}, /* 01,ad,00,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x70, ZC3XX_R18D_YTARGET}, /* 01,8d,70,cc */ {0xa0, 0xff, ZC3XX_R097_WINYSTARTHIGH}, {0xa0, 0xfe, ZC3XX_R098_WINYSTARTLOW}, {} }; static const struct usb_action pas202b_50HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, /* 00,87,20,cc */ {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, /* 00,88,21,cc */ {0xaa, 0x20, 0x0002}, /* 00,20,02,aa */ {0xaa, 0x21, 0x001b}, {0xaa, 0x03, 0x0044}, /* 00,03,44,aa */ {0xaa, 0x04, 0x0008}, {0xaa, 0x05, 0x001b}, {0xaa, 0x0e, 0x0001}, /* 00,0e,01,aa */ {0xaa, 0x0f, 0x0000}, /* 00,0f,00,aa */ {0xa0, 0x1c, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x1b, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x4d, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,4d,cc */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1b, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x44, ZC3XX_R01D_HSYNC_0}, /* 00,1d,44,cc */ {0xa0, 0x6f, ZC3XX_R01E_HSYNC_1}, /* 00,1e,6f,cc */ {0xa0, 0xad, ZC3XX_R01F_HSYNC_2}, /* 00,1f,ad,cc */ {0xa0, 0xeb, ZC3XX_R020_HSYNC_3}, /* 00,20,eb,cc */ {0xa0, 0x0f, ZC3XX_R087_EXPTIMEMID}, /* 00,87,0f,cc */ {0xa0, 0x0e, ZC3XX_R088_EXPTIMELOW}, /* 00,88,0e,cc */ {} }; static const struct usb_action pas202b_50HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, /* 00,87,20,cc */ {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, /* 00,88,21,cc */ {0xaa, 0x20, 0x0004}, {0xaa, 0x21, 0x003d}, {0xaa, 0x03, 0x0041}, /* 00,03,41,aa */ {0xaa, 0x04, 0x0010}, {0xaa, 0x05, 0x003d}, {0xaa, 0x0e, 0x0001}, /* 00,0e,01,aa */ {0xaa, 0x0f, 0x0000}, /* 00,0f,00,aa */ {0xa0, 0x1c, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x3d, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x9b, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,9b,cc */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1b, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x41, ZC3XX_R01D_HSYNC_0}, /* 00,1d,41,cc */ {0xa0, 0x6f, ZC3XX_R01E_HSYNC_1}, /* 00,1e,6f,cc */ {0xa0, 0xad, ZC3XX_R01F_HSYNC_2}, /* 00,1f,ad,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x0f, ZC3XX_R087_EXPTIMEMID}, /* 00,87,0f,cc */ {0xa0, 0x0e, ZC3XX_R088_EXPTIMELOW}, /* 00,88,0e,cc */ {} }; static const struct usb_action pas202b_60HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, /* 00,87,20,cc */ {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, /* 00,88,21,cc */ {0xaa, 0x20, 0x0002}, /* 00,20,02,aa */ {0xaa, 0x21, 0x0000}, /* 00,21,00,aa */ {0xaa, 0x03, 0x0045}, /* 00,03,45,aa */ {0xaa, 0x04, 0x0008}, /* 00,04,08,aa */ {0xaa, 0x05, 0x0000}, /* 00,05,00,aa */ {0xaa, 0x0e, 0x0001}, /* 00,0e,01,aa */ {0xaa, 0x0f, 0x0000}, /* 00,0f,00,aa */ {0xa0, 0x1c, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x40, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,40,cc */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1b, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x45, ZC3XX_R01D_HSYNC_0}, /* 00,1d,45,cc */ {0xa0, 0x8e, ZC3XX_R01E_HSYNC_1}, /* 00,1e,8e,cc */ {0xa0, 0xc1, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c1,cc */ {0xa0, 0xf5, ZC3XX_R020_HSYNC_3}, /* 00,20,f5,cc */ {0xa0, 0x0f, ZC3XX_R087_EXPTIMEMID}, /* 00,87,0f,cc */ {0xa0, 0x0e, ZC3XX_R088_EXPTIMELOW}, /* 00,88,0e,cc */ {} }; static const struct usb_action pas202b_60HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, /* 00,87,20,cc */ {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, /* 00,88,21,cc */ {0xaa, 0x20, 0x0004}, {0xaa, 0x21, 0x0008}, {0xaa, 0x03, 0x0042}, /* 00,03,42,aa */ {0xaa, 0x04, 0x0010}, {0xaa, 0x05, 0x0008}, {0xaa, 0x0e, 0x0001}, /* 00,0e,01,aa */ {0xaa, 0x0f, 0x0000}, /* 00,0f,00,aa */ {0xa0, 0x1c, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x08, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x81, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,81,cc */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1b, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x42, ZC3XX_R01D_HSYNC_0}, /* 00,1d,42,cc */ {0xa0, 0x6f, ZC3XX_R01E_HSYNC_1}, /* 00,1e,6f,cc */ {0xa0, 0xaf, ZC3XX_R01F_HSYNC_2}, /* 00,1f,af,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x0f, ZC3XX_R087_EXPTIMEMID}, /* 00,87,0f,cc */ {0xa0, 0x0e, ZC3XX_R088_EXPTIMELOW}, /* 00,88,0e,cc */ {} }; static const struct usb_action pas202b_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, /* 00,87,20,cc */ {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, /* 00,88,21,cc */ {0xaa, 0x20, 0x0002}, /* 00,20,02,aa */ {0xaa, 0x21, 0x0006}, {0xaa, 0x03, 0x0040}, /* 00,03,40,aa */ {0xaa, 0x04, 0x0008}, /* 00,04,08,aa */ {0xaa, 0x05, 0x0006}, {0xaa, 0x0e, 0x0001}, /* 00,0e,01,aa */ {0xaa, 0x0f, 0x0000}, /* 00,0f,00,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x02, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x06, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x01, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x40, ZC3XX_R01D_HSYNC_0}, /* 00,1d,40,cc */ {0xa0, 0x60, ZC3XX_R01E_HSYNC_1}, /* 00,1e,60,cc */ {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, /* 00,1f,90,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x0f, ZC3XX_R087_EXPTIMEMID}, /* 00,87,0f,cc */ {0xa0, 0x0e, ZC3XX_R088_EXPTIMELOW}, /* 00,88,0e,cc */ {} }; static const struct usb_action pas202b_NoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xa0, 0x20, ZC3XX_R087_EXPTIMEMID}, /* 00,87,20,cc */ {0xa0, 0x21, ZC3XX_R088_EXPTIMELOW}, /* 00,88,21,cc */ {0xaa, 0x20, 0x0004}, {0xaa, 0x21, 0x000c}, {0xaa, 0x03, 0x0040}, /* 00,03,40,aa */ {0xaa, 0x04, 0x0010}, {0xaa, 0x05, 0x000c}, {0xaa, 0x0e, 0x0001}, /* 00,0e,01,aa */ {0xaa, 0x0f, 0x0000}, /* 00,0f,00,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x0c, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x02, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,02,cc */ {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, /* 01,8c,10,cc */ {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,20,cc */ {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x40, ZC3XX_R01D_HSYNC_0}, /* 00,1d,40,cc */ {0xa0, 0x60, ZC3XX_R01E_HSYNC_1}, /* 00,1e,60,cc */ {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, /* 00,1f,90,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x0f, ZC3XX_R087_EXPTIMEMID}, /* 00,87,0f,cc */ {0xa0, 0x0e, ZC3XX_R088_EXPTIMELOW}, /* 00,88,0e,cc */ {} }; /* mt9v111 (mi0360soc) and pb0330 from vm30x.inf 0ac8:301b 07/02/13 */ static const struct usb_action mt9v111_1_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x0001}, {0xaa, 0x06, 0x0000}, {0xaa, 0x08, 0x0483}, {0xaa, 0x01, 0x0004}, {0xaa, 0x08, 0x0006}, {0xaa, 0x02, 0x0011}, {0xaa, 0x03, 0x01e5}, /*jfm: was 01e7*/ {0xaa, 0x04, 0x0285}, /*jfm: was 0287*/ {0xaa, 0x07, 0x3002}, {0xaa, 0x20, 0x5100}, {0xaa, 0x35, 0x507f}, {0xaa, 0x30, 0x0005}, {0xaa, 0x31, 0x0000}, {0xaa, 0x58, 0x0078}, {0xaa, 0x62, 0x0411}, {0xaa, 0x2b, 0x007f}, {0xaa, 0x2c, 0x007f}, /*jfm: was 0030*/ {0xaa, 0x2d, 0x007f}, /*jfm: was 0030*/ {0xaa, 0x2e, 0x007f}, /*jfm: was 0030*/ {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x09, 0x01ad}, /*jfm: was 00*/ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x6c, ZC3XX_R18D_YTARGET}, {0xa0, 0x61, ZC3XX_R116_RGAIN}, {0xa0, 0x65, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action mt9v111_1_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x0001}, {0xaa, 0x06, 0x0000}, {0xaa, 0x08, 0x0483}, {0xaa, 0x01, 0x0004}, {0xaa, 0x08, 0x0006}, {0xaa, 0x02, 0x0011}, {0xaa, 0x03, 0x01e7}, {0xaa, 0x04, 0x0287}, {0xaa, 0x07, 0x3002}, {0xaa, 0x20, 0x5100}, {0xaa, 0x35, 0x007f}, /*jfm: was 0050*/ {0xaa, 0x30, 0x0005}, {0xaa, 0x31, 0x0000}, {0xaa, 0x58, 0x0078}, {0xaa, 0x62, 0x0411}, {0xaa, 0x2b, 0x007f}, /*jfm: was 28*/ {0xaa, 0x2c, 0x007f}, /*jfm: was 30*/ {0xaa, 0x2d, 0x007f}, /*jfm: was 30*/ {0xaa, 0x2e, 0x007f}, /*jfm: was 28*/ {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x09, 0x01ad}, /*jfm: was 00*/ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x6c, ZC3XX_R18D_YTARGET}, {0xa0, 0x61, ZC3XX_R116_RGAIN}, {0xa0, 0x65, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action mt9v111_1_AE50HZ[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0562}, {0xbb, 0x01, 0x09aa}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x03, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x9b, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x47, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_1_AE50HZScale[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0509}, {0xbb, 0x01, 0x0934}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xd2, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x9a, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd7, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xf4, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf9, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_1_AE60HZ[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x05, 0x003d}, {0xaa, 0x09, 0x016e}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xdd, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x3d, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_1_AE60HZScale[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0509}, {0xbb, 0x01, 0x0983}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x8f, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x81, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd7, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xf4, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf9, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_1_AENoFlicker[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0509}, {0xbb, 0x01, 0x0960}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xf0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x04, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x09, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x40, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xe0, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_1_AENoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0534}, {0xbb, 0x02, 0x0960}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xf0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x04, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x34, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x60, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xe0, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; /* from usbvm303.inf 0ac8:303b 07/03/25 (3 - tas5130c) */ static const struct usb_action mt9v111_3_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x0001}, /* select IFP/SOC registers */ {0xaa, 0x06, 0x0000}, /* operating mode control */ {0xaa, 0x08, 0x0483}, /* output format control */ /* H red first, V red or blue first, * raw Bayer, auto flicker */ {0xaa, 0x01, 0x0004}, /* select sensor core registers */ {0xaa, 0x08, 0x0006}, /* row start */ {0xaa, 0x02, 0x0011}, /* column start */ {0xaa, 0x03, 0x01e5}, /* window height - 1 */ {0xaa, 0x04, 0x0285}, /* window width - 1 */ {0xaa, 0x07, 0x3002}, /* output control */ {0xaa, 0x20, 0x1100}, /* read mode: bits 8 & 12 (?) */ {0xaa, 0x35, 0x007f}, /* global gain */ {0xaa, 0x30, 0x0005}, {0xaa, 0x31, 0x0000}, {0xaa, 0x58, 0x0078}, {0xaa, 0x62, 0x0411}, {0xaa, 0x2b, 0x007f}, /* green1 gain */ {0xaa, 0x2c, 0x007f}, /* blue gain */ {0xaa, 0x2d, 0x007f}, /* red gain */ {0xaa, 0x2e, 0x007f}, /* green2 gain */ {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x80, ZC3XX_R18D_YTARGET}, {0xa0, 0x61, ZC3XX_R116_RGAIN}, {0xa0, 0x65, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action mt9v111_3_InitialScale[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xdc, ZC3XX_R08B_I2CDEVICEADDR}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x0001}, {0xaa, 0x06, 0x0000}, {0xaa, 0x08, 0x0483}, {0xaa, 0x01, 0x0004}, {0xaa, 0x08, 0x0006}, {0xaa, 0x02, 0x0011}, {0xaa, 0x03, 0x01e7}, {0xaa, 0x04, 0x0287}, {0xaa, 0x07, 0x3002}, {0xaa, 0x20, 0x1100}, {0xaa, 0x35, 0x007f}, {0xaa, 0x30, 0x0005}, {0xaa, 0x31, 0x0000}, {0xaa, 0x58, 0x0078}, {0xaa, 0x62, 0x0411}, {0xaa, 0x2b, 0x007f}, {0xaa, 0x2c, 0x007f}, {0xaa, 0x2d, 0x007f}, {0xaa, 0x2e, 0x007f}, {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x80, ZC3XX_R18D_YTARGET}, {0xa0, 0x61, ZC3XX_R116_RGAIN}, {0xa0, 0x65, ZC3XX_R118_BGAIN}, {} }; static const struct usb_action mt9v111_3_AE50HZ[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x05, 0x0009}, /* horizontal blanking */ {0xaa, 0x09, 0x01ce}, /* shutter width */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xd2, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x9a, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd7, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xf4, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf9, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_3_AE50HZScale[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x05, 0x0009}, {0xaa, 0x09, 0x01ce}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xd2, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x9a, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd7, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xf4, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf9, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_3_AE60HZ[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x05, 0x0009}, {0xaa, 0x09, 0x0083}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x8f, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x81, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd7, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xf4, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf9, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_3_AE60HZScale[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x05, 0x0009}, {0xaa, 0x09, 0x0083}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x8f, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x81, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd7, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xf4, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf9, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_3_AENoFlicker[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x05, 0x0034}, {0xaa, 0x09, 0x0260}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xf0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x04, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x34, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x60, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xe0, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action mt9v111_3_AENoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R180_AUTOCORRECTENABLE}, {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xaa, 0x05, 0x0034}, {0xaa, 0x09, 0x0260}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xf0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x04, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1c, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x34, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x60, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xe0, ZC3XX_R020_HSYNC_3}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, {} }; static const struct usb_action pb0330_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00 */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x0006}, {0xaa, 0x02, 0x0011}, {0xaa, 0x03, 0x01e5}, /*jfm: was 1e7*/ {0xaa, 0x04, 0x0285}, /*jfm: was 0287*/ {0xaa, 0x06, 0x0003}, {0xaa, 0x07, 0x3002}, {0xaa, 0x20, 0x1100}, {0xaa, 0x2f, 0xf7b0}, {0xaa, 0x30, 0x0005}, {0xaa, 0x31, 0x0000}, {0xaa, 0x34, 0x0100}, {0xaa, 0x35, 0x0060}, {0xaa, 0x3d, 0x068f}, {0xaa, 0x40, 0x01e0}, {0xaa, 0x58, 0x0078}, {0xaa, 0x62, 0x0411}, {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x09, 0x01ad}, /*jfm: was 00 */ {0xa0, 0x15, 0x01ae}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x78, ZC3XX_R18D_YTARGET}, /*jfm: was 6c*/ {} }; static const struct usb_action pb0330_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00 */ {0xa0, 0x0a, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x07, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, {0xdd, 0x00, 0x0200}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xaa, 0x01, 0x0006}, {0xaa, 0x02, 0x0011}, {0xaa, 0x03, 0x01e7}, {0xaa, 0x04, 0x0287}, {0xaa, 0x06, 0x0003}, {0xaa, 0x07, 0x3002}, {0xaa, 0x20, 0x1100}, {0xaa, 0x2f, 0xf7b0}, {0xaa, 0x30, 0x0005}, {0xaa, 0x31, 0x0000}, {0xaa, 0x34, 0x0100}, {0xaa, 0x35, 0x0060}, {0xaa, 0x3d, 0x068f}, {0xaa, 0x40, 0x01e0}, {0xaa, 0x58, 0x0078}, {0xaa, 0x62, 0x0411}, {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x09, 0x01ad}, {0xa0, 0x15, 0x01ae}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x78, ZC3XX_R18D_YTARGET}, /*jfm: was 6c*/ {} }; static const struct usb_action pb0330_50HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x055c}, {0xbb, 0x01, 0x09aa}, {0xbb, 0x00, 0x1001}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xc4, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x47, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1a, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x5c, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action pb0330_50HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0566}, {0xbb, 0x02, 0x09b2}, {0xbb, 0x00, 0x1002}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x8c, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x8a, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1a, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd7, ZC3XX_R01D_HSYNC_0}, {0xa0, 0xf0, ZC3XX_R01E_HSYNC_1}, {0xa0, 0xf8, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action pb0330_60HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0535}, {0xbb, 0x01, 0x0974}, {0xbb, 0x00, 0x1001}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xfe, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x3e, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1a, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x35, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x50, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xd0, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action pb0330_60HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0535}, {0xbb, 0x02, 0x096c}, {0xbb, 0x00, 0x1002}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xc0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x7c, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x1a, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x14, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x66, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x35, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x50, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xd0, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action pb0330_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0509}, {0xbb, 0x02, 0x0940}, {0xbb, 0x00, 0x1002}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xf0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x01, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x09, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x40, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xe0, ZC3XX_R020_HSYNC_3}, {} }; static const struct usb_action pb0330_NoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, {0xbb, 0x00, 0x0535}, {0xbb, 0x01, 0x0980}, {0xbb, 0x00, 0x1001}, {0xa0, 0x60, ZC3XX_R11D_GLOBALGAIN}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xf0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x01, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x10, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x20, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x35, ZC3XX_R01D_HSYNC_0}, {0xa0, 0x60, ZC3XX_R01E_HSYNC_1}, {0xa0, 0x90, ZC3XX_R01F_HSYNC_2}, {0xa0, 0xe0, ZC3XX_R020_HSYNC_3}, {} }; /* from oem9.inf */ static const struct usb_action po2030_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x04, ZC3XX_R002_CLOCKSELECT}, /* 00,02,04,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x04, ZC3XX_R080_HBLANKHIGH}, /* 00,80,04,cc */ {0xa0, 0x05, ZC3XX_R081_HBLANKLOW}, /* 00,81,05,cc */ {0xa0, 0x16, ZC3XX_R083_RGAINADDR}, /* 00,83,16,cc */ {0xa0, 0x18, ZC3XX_R085_BGAINADDR}, /* 00,85,18,cc */ {0xa0, 0x1a, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,1a,cc */ {0xa0, 0x1b, ZC3XX_R087_EXPTIMEMID}, /* 00,87,1b,cc */ {0xa0, 0x1c, ZC3XX_R088_EXPTIMELOW}, /* 00,88,1c,cc */ {0xa0, 0xee, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,ee,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,e0,cc */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc */ {0xaa, 0x8d, 0x0008}, /* 00,8d,08,aa */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xe6, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,e6,cc */ {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,86,cc */ {0xaa, 0x09, 0x00ce}, /* 00,09,ce,aa */ {0xaa, 0x0b, 0x0005}, /* 00,0b,05,aa */ {0xaa, 0x0d, 0x0054}, /* 00,0d,54,aa */ {0xaa, 0x0f, 0x00eb}, /* 00,0f,eb,aa */ {0xaa, 0x87, 0x0000}, /* 00,87,00,aa */ {0xaa, 0x88, 0x0004}, /* 00,88,04,aa */ {0xaa, 0x89, 0x0000}, /* 00,89,00,aa */ {0xaa, 0x8a, 0x0005}, /* 00,8a,05,aa */ {0xaa, 0x13, 0x0003}, /* 00,13,03,aa */ {0xaa, 0x16, 0x0040}, /* 00,16,40,aa */ {0xaa, 0x18, 0x0040}, /* 00,18,40,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x29, 0x00e8}, /* 00,29,e8,aa */ {0xaa, 0x45, 0x0045}, /* 00,45,45,aa */ {0xaa, 0x50, 0x00ed}, /* 00,50,ed,aa */ {0xaa, 0x51, 0x0025}, /* 00,51,25,aa */ {0xaa, 0x52, 0x0042}, /* 00,52,42,aa */ {0xaa, 0x53, 0x002f}, /* 00,53,2f,aa */ {0xaa, 0x79, 0x0025}, /* 00,79,25,aa */ {0xaa, 0x7b, 0x0000}, /* 00,7b,00,aa */ {0xaa, 0x7e, 0x0025}, /* 00,7e,25,aa */ {0xaa, 0x7f, 0x0025}, /* 00,7f,25,aa */ {0xaa, 0x21, 0x0000}, /* 00,21,00,aa */ {0xaa, 0x33, 0x0036}, /* 00,33,36,aa */ {0xaa, 0x36, 0x0060}, /* 00,36,60,aa */ {0xaa, 0x37, 0x0008}, /* 00,37,08,aa */ {0xaa, 0x3b, 0x0031}, /* 00,3b,31,aa */ {0xaa, 0x44, 0x000f}, /* 00,44,0f,aa */ {0xaa, 0x58, 0x0002}, /* 00,58,02,aa */ {0xaa, 0x66, 0x00c0}, /* 00,66,c0,aa */ {0xaa, 0x67, 0x0044}, /* 00,67,44,aa */ {0xaa, 0x6b, 0x00a0}, /* 00,6b,a0,aa */ {0xaa, 0x6c, 0x0054}, /* 00,6c,54,aa */ {0xaa, 0xd6, 0x0007}, /* 00,d6,07,aa */ {0xa0, 0xf7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,f7,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x00, 0x01ad}, /* 01,ad,00,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x7a, ZC3XX_R116_RGAIN}, /* 01,16,7a,cc */ {0xa0, 0x4a, ZC3XX_R118_BGAIN}, /* 01,18,4a,cc */ {} }; /* from oem9.inf */ static const struct usb_action po2030_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc */ {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, /* 00,02,10,cc */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc */ {0xa0, 0x04, ZC3XX_R080_HBLANKHIGH}, /* 00,80,04,cc */ {0xa0, 0x05, ZC3XX_R081_HBLANKLOW}, /* 00,81,05,cc */ {0xa0, 0x16, ZC3XX_R083_RGAINADDR}, /* 00,83,16,cc */ {0xa0, 0x18, ZC3XX_R085_BGAINADDR}, /* 00,85,18,cc */ {0xa0, 0x1a, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,1a,cc */ {0xa0, 0x1b, ZC3XX_R087_EXPTIMEMID}, /* 00,87,1b,cc */ {0xa0, 0x1c, ZC3XX_R088_EXPTIMELOW}, /* 00,88,1c,cc */ {0xa0, 0xee, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,ee,cc */ {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, /* 00,08,03,cc */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc */ {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,e0,cc */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc */ {0xaa, 0x8d, 0x0008}, /* 00,8d,08,aa */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc */ {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,e8,cc */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,88,cc */ {0xaa, 0x09, 0x00cc}, /* 00,09,cc,aa */ {0xaa, 0x0b, 0x0005}, /* 00,0b,05,aa */ {0xaa, 0x0d, 0x0058}, /* 00,0d,58,aa */ {0xaa, 0x0f, 0x00ed}, /* 00,0f,ed,aa */ {0xaa, 0x87, 0x0000}, /* 00,87,00,aa */ {0xaa, 0x88, 0x0004}, /* 00,88,04,aa */ {0xaa, 0x89, 0x0000}, /* 00,89,00,aa */ {0xaa, 0x8a, 0x0005}, /* 00,8a,05,aa */ {0xaa, 0x13, 0x0003}, /* 00,13,03,aa */ {0xaa, 0x16, 0x0040}, /* 00,16,40,aa */ {0xaa, 0x18, 0x0040}, /* 00,18,40,aa */ {0xaa, 0x1d, 0x0002}, /* 00,1d,02,aa */ {0xaa, 0x29, 0x00e8}, /* 00,29,e8,aa */ {0xaa, 0x45, 0x0045}, /* 00,45,45,aa */ {0xaa, 0x50, 0x00ed}, /* 00,50,ed,aa */ {0xaa, 0x51, 0x0025}, /* 00,51,25,aa */ {0xaa, 0x52, 0x0042}, /* 00,52,42,aa */ {0xaa, 0x53, 0x002f}, /* 00,53,2f,aa */ {0xaa, 0x79, 0x0025}, /* 00,79,25,aa */ {0xaa, 0x7b, 0x0000}, /* 00,7b,00,aa */ {0xaa, 0x7e, 0x0025}, /* 00,7e,25,aa */ {0xaa, 0x7f, 0x0025}, /* 00,7f,25,aa */ {0xaa, 0x21, 0x0000}, /* 00,21,00,aa */ {0xaa, 0x33, 0x0036}, /* 00,33,36,aa */ {0xaa, 0x36, 0x0060}, /* 00,36,60,aa */ {0xaa, 0x37, 0x0008}, /* 00,37,08,aa */ {0xaa, 0x3b, 0x0031}, /* 00,3b,31,aa */ {0xaa, 0x44, 0x000f}, /* 00,44,0f,aa */ {0xaa, 0x58, 0x0002}, /* 00,58,02,aa */ {0xaa, 0x66, 0x00c0}, /* 00,66,c0,aa */ {0xaa, 0x67, 0x0044}, /* 00,67,44,aa */ {0xaa, 0x6b, 0x00a0}, /* 00,6b,a0,aa */ {0xaa, 0x6c, 0x0054}, /* 00,6c,54,aa */ {0xaa, 0xd6, 0x0007}, /* 00,d6,07,aa */ {0xa0, 0xf7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,f7,cc */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc */ {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, /* 01,89,06,cc */ {0xa0, 0x00, 0x01ad}, /* 01,ad,00,cc */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc */ {0xa0, 0x7a, ZC3XX_R116_RGAIN}, /* 01,16,7a,cc */ {0xa0, 0x4a, ZC3XX_R118_BGAIN}, /* 01,18,4a,cc */ {} }; static const struct usb_action po2030_50HZ[] = { {0xaa, 0x8d, 0x0008}, /* 00,8d,08,aa */ {0xaa, 0x1a, 0x0001}, /* 00,1a,01,aa */ {0xaa, 0x1b, 0x000a}, /* 00,1b,0a,aa */ {0xaa, 0x1c, 0x00b0}, /* 00,1c,b0,aa */ {0xa0, 0x05, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,05,cc */ {0xa0, 0x35, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,35,cc */ {0xa0, 0x70, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,70,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x85, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,85,cc */ {0xa0, 0x58, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,58,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0c,cc */ {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,18,cc */ {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,60,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x22, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,22,cc */ {0xa0, 0x88, ZC3XX_R18D_YTARGET}, /* 01,8d,88,cc */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,58,cc */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc */ {} }; static const struct usb_action po2030_60HZ[] = { {0xaa, 0x8d, 0x0008}, /* 00,8d,08,aa */ {0xaa, 0x1a, 0x0000}, /* 00,1a,00,aa */ {0xaa, 0x1b, 0x00de}, /* 00,1b,de,aa */ {0xaa, 0x1c, 0x0040}, /* 00,1c,40,aa */ {0xa0, 0x08, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,08,cc */ {0xa0, 0xae, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,ae,cc */ {0xa0, 0x80, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,80,cc */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x6f, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,6f,cc */ {0xa0, 0x20, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,20,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0c,cc */ {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,18,cc */ {0xa0, 0x60, ZC3XX_R1A8_DIGITALGAIN}, /* 01,a8,60,cc */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc */ {0xa0, 0x22, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,22,cc */ {0xa0, 0x88, ZC3XX_R18D_YTARGET}, /* 01,8d,88,cc */ /* win: 01,8d,80 */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,58,cc */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc */ {} }; static const struct usb_action po2030_NoFlicker[] = { {0xa0, 0x02, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,02,cc */ {0xaa, 0x8d, 0x000d}, /* 00,8d,0d,aa */ {0xaa, 0x1a, 0x0000}, /* 00,1a,00,aa */ {0xaa, 0x1b, 0x0002}, /* 00,1b,02,aa */ {0xaa, 0x1c, 0x0078}, /* 00,1c,78,aa */ {0xaa, 0x46, 0x0000}, /* 00,46,00,aa */ {0xaa, 0x15, 0x0000}, /* 00,15,00,aa */ {} }; static const struct usb_action tas5130c_InitialScale[] = { /* 320x240 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x50, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x03, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x02, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x00, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x04, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x0f, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x04, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x0f, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, {0xa0, 0x06, ZC3XX_R08D_COMPABILITYMODE}, {0xa0, 0xf7, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x70, ZC3XX_R18D_YTARGET}, {0xa0, 0x50, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x07, ZC3XX_R0A5_EXPOSUREGAIN}, {0xa0, 0x02, ZC3XX_R0A6_EXPOSUREBLACKLVL}, {} }; static const struct usb_action tas5130c_Initial[] = { /* 640x480 */ {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, {0xa0, 0x40, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x00, ZC3XX_R008_CLOCKSETTING}, {0xa0, 0x02, ZC3XX_R010_CMOSSENSORSELECT}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x00, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, {0xa0, 0x05, ZC3XX_R098_WINYSTARTLOW}, {0xa0, 0x0f, ZC3XX_R09A_WINXSTARTLOW}, {0xa0, 0x05, ZC3XX_R11A_FIRSTYLOW}, {0xa0, 0x0f, ZC3XX_R11C_FIRSTXLOW}, {0xa0, 0xe6, ZC3XX_R09C_WINHEIGHTLOW}, {0xa0, 0x02, ZC3XX_R09D_WINWIDTHHIGH}, {0xa0, 0x86, ZC3XX_R09E_WINWIDTHLOW}, {0xa0, 0x06, ZC3XX_R08D_COMPABILITYMODE}, {0xa0, 0x37, ZC3XX_R101_SENSORCORRECTION}, {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, {0xa0, 0x06, ZC3XX_R189_AWBSTATUS}, {0xa0, 0x70, ZC3XX_R18D_YTARGET}, {0xa0, 0x50, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x00, 0x01ad}, {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, {0xa0, 0x07, ZC3XX_R0A5_EXPOSUREGAIN}, {0xa0, 0x02, ZC3XX_R0A6_EXPOSUREBLACKLVL}, {} }; static const struct usb_action tas5130c_50HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0xa3, 0x0001}, /* 00,a3,01,aa */ {0xaa, 0xa4, 0x0063}, /* 00,a4,63,aa */ {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,01,cc */ {0xa0, 0x63, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,63,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x04, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xfe, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x47, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,47,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x08, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xd3, ZC3XX_R01D_HSYNC_0}, /* 00,1d,d3,cc */ {0xa0, 0xda, ZC3XX_R01E_HSYNC_1}, /* 00,1e,da,cc */ {0xa0, 0xea, ZC3XX_R01F_HSYNC_2}, /* 00,1f,ea,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x03, ZC3XX_R09F_MAXXHIGH}, /* 00,9f,03,cc */ {0xa0, 0x4c, ZC3XX_R0A0_MAXXLOW}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {} }; static const struct usb_action tas5130c_50HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0xa3, 0x0001}, /* 00,a3,01,aa */ {0xaa, 0xa4, 0x0077}, /* 00,a4,77,aa */ {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,01,cc */ {0xa0, 0x77, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,77,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x07, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xd0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x7d, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,7d,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x08, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xf0, ZC3XX_R01D_HSYNC_0}, /* 00,1d,f0,cc */ {0xa0, 0xf4, ZC3XX_R01E_HSYNC_1}, /* 00,1e,f4,cc */ {0xa0, 0xf8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,f8,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x03, ZC3XX_R09F_MAXXHIGH}, /* 00,9f,03,cc */ {0xa0, 0xc0, ZC3XX_R0A0_MAXXLOW}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {} }; static const struct usb_action tas5130c_60HZ[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0xa3, 0x0001}, /* 00,a3,01,aa */ {0xaa, 0xa4, 0x0036}, /* 00,a4,36,aa */ {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,01,cc */ {0xa0, 0x36, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,36,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x05, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x54, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x3e, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,3e,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x08, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xca, ZC3XX_R01D_HSYNC_0}, /* 00,1d,ca,cc */ {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d0,cc */ {0xa0, 0xe0, ZC3XX_R01F_HSYNC_2}, /* 00,1f,e0,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x03, ZC3XX_R09F_MAXXHIGH}, /* 00,9f,03,cc */ {0xa0, 0x28, ZC3XX_R0A0_MAXXLOW}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {} }; static const struct usb_action tas5130c_60HZScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0xa3, 0x0001}, /* 00,a3,01,aa */ {0xaa, 0xa4, 0x0077}, /* 00,a4,77,aa */ {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,01,cc */ {0xa0, 0x77, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,77,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x09, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x47, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc */ {0xa0, 0x7d, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,7d,cc */ {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x08, ZC3XX_R1A9_DIGITALLIMITDIFF}, {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0xc8, ZC3XX_R01D_HSYNC_0}, /* 00,1d,c8,cc */ {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d0,cc */ {0xa0, 0xe0, ZC3XX_R01F_HSYNC_2}, /* 00,1f,e0,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x03, ZC3XX_R09F_MAXXHIGH}, /* 00,9f,03,cc */ {0xa0, 0x20, ZC3XX_R0A0_MAXXLOW}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {} }; static const struct usb_action tas5130c_NoFlicker[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0xa3, 0x0001}, /* 00,a3,01,aa */ {0xaa, 0xa4, 0x0040}, /* 00,a4,40,aa */ {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,01,cc */ {0xa0, 0x40, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,40,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x05, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0xa0, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x04, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,00,cc */ {0xa0, 0xbc, ZC3XX_R01D_HSYNC_0}, /* 00,1d,bc,cc */ {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d0,cc */ {0xa0, 0xe0, ZC3XX_R01F_HSYNC_2}, /* 00,1f,e0,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x02, ZC3XX_R09F_MAXXHIGH}, /* 00,9f,02,cc */ {0xa0, 0xf0, ZC3XX_R0A0_MAXXLOW}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {} }; static const struct usb_action tas5130c_NoFlickerScale[] = { {0xa0, 0x00, ZC3XX_R019_AUTOADJUSTFPS}, /* 00,19,00,cc */ {0xaa, 0xa3, 0x0001}, /* 00,a3,01,aa */ {0xaa, 0xa4, 0x0090}, /* 00,a4,90,aa */ {0xa0, 0x01, ZC3XX_R0A3_EXPOSURETIMEHIGH}, /* 00,a3,01,cc */ {0xa0, 0x90, ZC3XX_R0A4_EXPOSURETIMELOW}, /* 00,a4,90,cc */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc */ {0xa0, 0x0a, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x00, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, {0xa0, 0x04, ZC3XX_R197_ANTIFLICKERLOW}, {0xa0, 0x0c, ZC3XX_R18C_AEFREEZE}, {0xa0, 0x18, ZC3XX_R18F_AEUNFREEZE}, {0xa0, 0x00, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,00,cc */ {0xa0, 0x00, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,00,cc */ {0xa0, 0xbc, ZC3XX_R01D_HSYNC_0}, /* 00,1d,bc,cc */ {0xa0, 0xd0, ZC3XX_R01E_HSYNC_1}, /* 00,1e,d0,cc */ {0xa0, 0xe0, ZC3XX_R01F_HSYNC_2}, /* 00,1f,e0,cc */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc */ {0xa0, 0x02, ZC3XX_R09F_MAXXHIGH}, /* 00,9f,02,cc */ {0xa0, 0xf0, ZC3XX_R0A0_MAXXLOW}, {0xa0, 0x50, ZC3XX_R11D_GLOBALGAIN}, {} }; /* from usbvm305.inf 0ac8:305b 07/06/15 (3 - tas5130c) */ static const struct usb_action gc0303_Initial[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc, */ {0xa0, 0x02, ZC3XX_R008_CLOCKSETTING}, /* 00,08,02,cc, */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc, */ {0xa0, 0x00, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc, */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc, */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc, */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,e0,cc, */ {0xa0, 0x98, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,98,cc, */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc, */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc, */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc, */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc, */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc, */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc, */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc, */ {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,e6,cc, * 6<->8 */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,86,cc, * 6<->8 */ {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, /* 00,87,10,cc, */ {0xa0, 0x98, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,98,cc, */ {0xaa, 0x01, 0x0000}, {0xaa, 0x1a, 0x0000}, /* 00,1a,00,aa, */ {0xaa, 0x1c, 0x0017}, /* 00,1c,17,aa, */ {0xaa, 0x1b, 0x0000}, {0xa0, 0x82, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,82,cc, */ {0xa0, 0x83, ZC3XX_R087_EXPTIMEMID}, /* 00,87,83,cc, */ {0xa0, 0x84, ZC3XX_R088_EXPTIMELOW}, /* 00,88,84,cc, */ {0xaa, 0x05, 0x0010}, /* 00,05,10,aa, */ {0xaa, 0x0a, 0x0002}, {0xaa, 0x0b, 0x0000}, {0xaa, 0x0c, 0x0002}, {0xaa, 0x0d, 0x0000}, {0xaa, 0x0e, 0x0002}, {0xaa, 0x0f, 0x0000}, {0xaa, 0x10, 0x0002}, {0xaa, 0x11, 0x0000}, {0xaa, 0x16, 0x0001}, /* 00,16,01,aa, */ {0xaa, 0x17, 0x00e8}, /* 00,17,e6,aa, (e6 -> e8) */ {0xaa, 0x18, 0x0002}, /* 00,18,02,aa, */ {0xaa, 0x19, 0x0088}, /* 00,19,86,aa, */ {0xaa, 0x20, 0x0020}, /* 00,20,20,aa, */ {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,b7,cc, */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc, */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc, */ {0xa0, 0x76, ZC3XX_R189_AWBSTATUS}, /* 01,89,76,cc, */ {0xa0, 0x09, 0x01ad}, /* 01,ad,09,cc, */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc, */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc, */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc, */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc, */ {0xa0, 0x58, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x61, ZC3XX_R116_RGAIN}, /* 01,16,61,cc, */ {0xa0, 0x65, ZC3XX_R118_BGAIN}, /* 01,18,65,cc */ {0xaa, 0x1b, 0x0000}, {} }; static const struct usb_action gc0303_InitialScale[] = { {0xa0, 0x01, ZC3XX_R000_SYSTEMCONTROL}, /* 00,00,01,cc, */ {0xa0, 0x02, ZC3XX_R008_CLOCKSETTING}, /* 00,08,02,cc, */ {0xa0, 0x01, ZC3XX_R010_CMOSSENSORSELECT}, /* 00,10,01,cc, */ {0xa0, 0x10, ZC3XX_R002_CLOCKSELECT}, {0xa0, 0x02, ZC3XX_R003_FRAMEWIDTHHIGH}, /* 00,03,02,cc, */ {0xa0, 0x80, ZC3XX_R004_FRAMEWIDTHLOW}, /* 00,04,80,cc, */ {0xa0, 0x01, ZC3XX_R005_FRAMEHEIGHTHIGH}, /* 00,05,01,cc, */ {0xa0, 0xe0, ZC3XX_R006_FRAMEHEIGHTLOW}, /* 00,06,e0,cc, */ {0xa0, 0x98, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,98,cc, */ {0xa0, 0x01, ZC3XX_R001_SYSTEMOPERATING}, /* 00,01,01,cc, */ {0xa0, 0x03, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,03,cc, */ {0xa0, 0x01, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,01,cc, */ {0xa0, 0x00, ZC3XX_R098_WINYSTARTLOW}, /* 00,98,00,cc, */ {0xa0, 0x00, ZC3XX_R09A_WINXSTARTLOW}, /* 00,9a,00,cc, */ {0xa0, 0x00, ZC3XX_R11A_FIRSTYLOW}, /* 01,1a,00,cc, */ {0xa0, 0x00, ZC3XX_R11C_FIRSTXLOW}, /* 01,1c,00,cc, */ {0xa0, 0xe8, ZC3XX_R09C_WINHEIGHTLOW}, /* 00,9c,e8,cc, * 8<->6 */ {0xa0, 0x88, ZC3XX_R09E_WINWIDTHLOW}, /* 00,9e,88,cc, * 8<->6 */ {0xa0, 0x10, ZC3XX_R087_EXPTIMEMID}, /* 00,87,10,cc, */ {0xa0, 0x98, ZC3XX_R08B_I2CDEVICEADDR}, /* 00,8b,98,cc, */ {0xaa, 0x01, 0x0000}, {0xaa, 0x1a, 0x0000}, /* 00,1a,00,aa, */ {0xaa, 0x1c, 0x0017}, /* 00,1c,17,aa, */ {0xaa, 0x1b, 0x0000}, {0xa0, 0x82, ZC3XX_R086_EXPTIMEHIGH}, /* 00,86,82,cc, */ {0xa0, 0x83, ZC3XX_R087_EXPTIMEMID}, /* 00,87,83,cc, */ {0xa0, 0x84, ZC3XX_R088_EXPTIMELOW}, /* 00,88,84,cc, */ {0xaa, 0x05, 0x0010}, /* 00,05,10,aa, */ {0xaa, 0x0a, 0x0001}, {0xaa, 0x0b, 0x0000}, {0xaa, 0x0c, 0x0001}, {0xaa, 0x0d, 0x0000}, {0xaa, 0x0e, 0x0001}, {0xaa, 0x0f, 0x0000}, {0xaa, 0x10, 0x0001}, {0xaa, 0x11, 0x0000}, {0xaa, 0x16, 0x0001}, /* 00,16,01,aa, */ {0xaa, 0x17, 0x00e8}, /* 00,17,e6,aa (e6 -> e8) */ {0xaa, 0x18, 0x0002}, /* 00,18,02,aa, */ {0xaa, 0x19, 0x0088}, /* 00,19,88,aa, */ {0xa0, 0xb7, ZC3XX_R101_SENSORCORRECTION}, /* 01,01,b7,cc, */ {0xa0, 0x05, ZC3XX_R012_VIDEOCONTROLFUNC}, /* 00,12,05,cc, */ {0xa0, 0x0d, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0d,cc, */ {0xa0, 0x76, ZC3XX_R189_AWBSTATUS}, /* 01,89,76,cc, */ {0xa0, 0x09, 0x01ad}, /* 01,ad,09,cc, */ {0xa0, 0x03, ZC3XX_R1C5_SHARPNESSMODE}, /* 01,c5,03,cc, */ {0xa0, 0x13, ZC3XX_R1CB_SHARPNESS05}, /* 01,cb,13,cc, */ {0xa0, 0x08, ZC3XX_R250_DEADPIXELSMODE}, /* 02,50,08,cc, */ {0xa0, 0x08, ZC3XX_R301_EEPROMACCESS}, /* 03,01,08,cc, */ {0xa0, 0x58, ZC3XX_R1A8_DIGITALGAIN}, {0xa0, 0x61, ZC3XX_R116_RGAIN}, /* 01,16,61,cc, */ {0xa0, 0x65, ZC3XX_R118_BGAIN}, /* 01,18,65,cc */ {0xaa, 0x1b, 0x0000}, {} }; static const struct usb_action gc0303_50HZ[] = { {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0001}, /* 00,83,01,aa */ {0xaa, 0x84, 0x0063}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc, */ {0xa0, 0x06, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,0d,cc, */ {0xa0, 0xa8, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,50,cc, */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc, */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc, */ {0xa0, 0x47, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,47,cc, */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc, */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc, */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc, */ {0xa0, 0x48, ZC3XX_R1AA_DIGITALGAINSTEP}, {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc, */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc, */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc, */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc, */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,58,cc, */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc, */ {0xa0, 0x7f, ZC3XX_R18D_YTARGET}, {} }; static const struct usb_action gc0303_50HZScale[] = { {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0003}, /* 00,83,03,aa */ {0xaa, 0x84, 0x0054}, /* 00,84,54,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc, */ {0xa0, 0x0d, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,0d,cc, */ {0xa0, 0x50, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,50,cc, */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc, */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc, */ {0xa0, 0x8e, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,8e,cc, */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc, */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc, */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc, */ {0xa0, 0x48, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc, */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc, */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc, */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc, */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc, */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,58,cc, */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc, */ {0xa0, 0x7f, ZC3XX_R18D_YTARGET}, {} }; static const struct usb_action gc0303_60HZ[] = { {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0000}, {0xaa, 0x84, 0x003b}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc, */ {0xa0, 0x05, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,91,05,cc, */ {0xa0, 0x88, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,92,88,cc, */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc, */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc, */ {0xa0, 0x3b, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,3b,cc, */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc, */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc, */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,a9,10,cc, */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,aa,24,cc, */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc, */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc, */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc, */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc, */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,58,cc, */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc, */ {0xa0, 0x80, ZC3XX_R18D_YTARGET}, {} }; static const struct usb_action gc0303_60HZScale[] = { {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0000}, {0xaa, 0x84, 0x0076}, {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc, */ {0xa0, 0x0b, ZC3XX_R191_EXPOSURELIMITMID}, /* 01,1,0b,cc, */ {0xa0, 0x10, ZC3XX_R192_EXPOSURELIMITLOW}, /* 01,2,10,cc, */ {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,5,00,cc, */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,6,00,cc, */ {0xa0, 0x76, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,7,76,cc, */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,c,0e,cc, */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,f,15,cc, */ {0xa0, 0x10, ZC3XX_R1A9_DIGITALLIMITDIFF}, /* 01,9,10,cc, */ {0xa0, 0x24, ZC3XX_R1AA_DIGITALGAINSTEP}, /* 01,a,24,cc, */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,d,62,cc, */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,e,90,cc, */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,f,c8,cc, */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,0,ff,cc, */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,d,58,cc, */ {0xa0, 0x42, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,42,cc, */ {0xa0, 0x80, ZC3XX_R18D_YTARGET}, {} }; static const struct usb_action gc0303_NoFlicker[] = { {0xa0, 0x0c, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0c,cc, */ {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0000}, /* 00,83,00,aa */ {0xaa, 0x84, 0x0020}, /* 00,84,20,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,0,00,cc, */ {0xa0, 0x00, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x48, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc, */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc, */ {0xa0, 0x10, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,10,cc, */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc, */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc, */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc, */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc, */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc, */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc, */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,58,cc, */ {0xa0, 0x03, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,03,cc */ {} }; static const struct usb_action gc0303_NoFlickerScale[] = { {0xa0, 0x0c, ZC3XX_R100_OPERATIONMODE}, /* 01,00,0c,cc, */ {0xaa, 0x82, 0x0000}, /* 00,82,00,aa */ {0xaa, 0x83, 0x0000}, /* 00,83,00,aa */ {0xaa, 0x84, 0x0020}, /* 00,84,20,aa */ {0xa0, 0x00, ZC3XX_R190_EXPOSURELIMITHIGH}, /* 01,90,00,cc, */ {0xa0, 0x00, ZC3XX_R191_EXPOSURELIMITMID}, {0xa0, 0x48, ZC3XX_R192_EXPOSURELIMITLOW}, {0xa0, 0x00, ZC3XX_R195_ANTIFLICKERHIGH}, /* 01,95,00,cc, */ {0xa0, 0x00, ZC3XX_R196_ANTIFLICKERMID}, /* 01,96,00,cc, */ {0xa0, 0x10, ZC3XX_R197_ANTIFLICKERLOW}, /* 01,97,10,cc, */ {0xa0, 0x0e, ZC3XX_R18C_AEFREEZE}, /* 01,8c,0e,cc, */ {0xa0, 0x15, ZC3XX_R18F_AEUNFREEZE}, /* 01,8f,15,cc, */ {0xa0, 0x62, ZC3XX_R01D_HSYNC_0}, /* 00,1d,62,cc, */ {0xa0, 0x90, ZC3XX_R01E_HSYNC_1}, /* 00,1e,90,cc, */ {0xa0, 0xc8, ZC3XX_R01F_HSYNC_2}, /* 00,1f,c8,cc, */ {0xa0, 0xff, ZC3XX_R020_HSYNC_3}, /* 00,20,ff,cc, */ {0xa0, 0x58, ZC3XX_R11D_GLOBALGAIN}, /* 01,1d,58,cc, */ {0xa0, 0x03, ZC3XX_R180_AUTOCORRECTENABLE}, /* 01,80,03,cc */ {} }; static u8 reg_r(struct gspca_dev *gspca_dev, u16 index) { int ret; if (gspca_dev->usb_err < 0) return 0; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0xa1, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x01, /* value */ index, gspca_dev->usb_buf, 1, 500); if (ret < 0) { pr_err("reg_r err %d\n", ret); gspca_dev->usb_err = ret; return 0; } return gspca_dev->usb_buf[0]; } static void reg_w(struct gspca_dev *gspca_dev, u8 value, u16 index) { int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0xa0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); if (ret < 0) { pr_err("reg_w_i err %d\n", ret); gspca_dev->usb_err = ret; } } static u16 i2c_read(struct gspca_dev *gspca_dev, u8 reg) { u8 retbyte; u16 retval; if (gspca_dev->usb_err < 0) return 0; reg_w(gspca_dev, reg, 0x0092); reg_w(gspca_dev, 0x02, 0x0090); /* <- read command */ msleep(20); retbyte = reg_r(gspca_dev, 0x0091); /* read status */ if (retbyte != 0x00) pr_err("i2c_r status error %02x\n", retbyte); retval = reg_r(gspca_dev, 0x0095); /* read Lowbyte */ retval |= reg_r(gspca_dev, 0x0096) << 8; /* read Hightbyte */ return retval; } static u8 i2c_write(struct gspca_dev *gspca_dev, u8 reg, u8 valL, u8 valH) { u8 retbyte; if (gspca_dev->usb_err < 0) return 0; reg_w(gspca_dev, reg, 0x92); reg_w(gspca_dev, valL, 0x93); reg_w(gspca_dev, valH, 0x94); reg_w(gspca_dev, 0x01, 0x90); /* <- write command */ msleep(1); retbyte = reg_r(gspca_dev, 0x0091); /* read status */ if (retbyte != 0x00) pr_err("i2c_w status error %02x\n", retbyte); return retbyte; } static void usb_exchange(struct gspca_dev *gspca_dev, const struct usb_action *action) { while (action->req) { switch (action->req) { case 0xa0: /* write register */ reg_w(gspca_dev, action->val, action->idx); break; case 0xa1: /* read status */ reg_r(gspca_dev, action->idx); break; case 0xaa: i2c_write(gspca_dev, action->val, /* reg */ action->idx & 0xff, /* valL */ action->idx >> 8); /* valH */ break; case 0xbb: i2c_write(gspca_dev, action->idx >> 8, /* reg */ action->idx & 0xff, /* valL */ action->val); /* valH */ break; default: /* case 0xdd: * delay */ msleep(action->idx); break; } action++; msleep(1); } } static void setmatrix(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int i; const u8 *matrix; static const u8 adcm2700_matrix[9] = /* {0x66, 0xed, 0xed, 0xed, 0x66, 0xed, 0xed, 0xed, 0x66}; */ /*ms-win*/ {0x74, 0xed, 0xed, 0xed, 0x74, 0xed, 0xed, 0xed, 0x74}; static const u8 gc0305_matrix[9] = {0x50, 0xf8, 0xf8, 0xf8, 0x50, 0xf8, 0xf8, 0xf8, 0x50}; static const u8 ov7620_matrix[9] = {0x58, 0xf4, 0xf4, 0xf4, 0x58, 0xf4, 0xf4, 0xf4, 0x58}; static const u8 pas202b_matrix[9] = {0x4c, 0xf5, 0xff, 0xf9, 0x51, 0xf5, 0xfb, 0xed, 0x5f}; static const u8 po2030_matrix[9] = {0x60, 0xf0, 0xf0, 0xf0, 0x60, 0xf0, 0xf0, 0xf0, 0x60}; static const u8 tas5130c_matrix[9] = {0x68, 0xec, 0xec, 0xec, 0x68, 0xec, 0xec, 0xec, 0x68}; static const u8 gc0303_matrix[9] = {0x6c, 0xea, 0xea, 0xea, 0x6c, 0xea, 0xea, 0xea, 0x6c}; static const u8 *matrix_tb[SENSOR_MAX] = { [SENSOR_ADCM2700] = adcm2700_matrix, [SENSOR_CS2102] = ov7620_matrix, [SENSOR_CS2102K] = NULL, [SENSOR_GC0303] = gc0303_matrix, [SENSOR_GC0305] = gc0305_matrix, [SENSOR_HDCS2020] = NULL, [SENSOR_HV7131B] = NULL, [SENSOR_HV7131R] = po2030_matrix, [SENSOR_ICM105A] = po2030_matrix, [SENSOR_MC501CB] = NULL, [SENSOR_MT9V111_1] = gc0305_matrix, [SENSOR_MT9V111_3] = gc0305_matrix, [SENSOR_OV7620] = ov7620_matrix, [SENSOR_OV7630C] = NULL, [SENSOR_PAS106] = NULL, [SENSOR_PAS202B] = pas202b_matrix, [SENSOR_PB0330] = gc0305_matrix, [SENSOR_PO2030] = po2030_matrix, [SENSOR_TAS5130C] = tas5130c_matrix, }; matrix = matrix_tb[sd->sensor]; if (matrix == NULL) return; /* matrix already loaded */ for (i = 0; i < ARRAY_SIZE(ov7620_matrix); i++) reg_w(gspca_dev, matrix[i], 0x010a + i); } static void setsharpness(struct gspca_dev *gspca_dev, s32 val) { static const u8 sharpness_tb[][2] = { {0x02, 0x03}, {0x04, 0x07}, {0x08, 0x0f}, {0x10, 0x1e} }; reg_w(gspca_dev, sharpness_tb[val][0], 0x01c6); reg_r(gspca_dev, 0x01c8); reg_r(gspca_dev, 0x01c9); reg_r(gspca_dev, 0x01ca); reg_w(gspca_dev, sharpness_tb[val][1], 0x01cb); } static void setcontrast(struct gspca_dev *gspca_dev, s32 gamma, s32 brightness, s32 contrast) { const u8 *Tgamma; int g, i, adj, gp1, gp2; u8 gr[16]; static const u8 delta_b[16] = /* delta for brightness */ {0x50, 0x38, 0x2d, 0x28, 0x24, 0x21, 0x1e, 0x1d, 0x1d, 0x1b, 0x1b, 0x1b, 0x19, 0x18, 0x18, 0x18}; static const u8 delta_c[16] = /* delta for contrast */ {0x2c, 0x1a, 0x12, 0x0c, 0x0a, 0x06, 0x06, 0x06, 0x04, 0x06, 0x04, 0x04, 0x03, 0x03, 0x02, 0x02}; static const u8 gamma_tb[6][16] = { {0x00, 0x00, 0x03, 0x0d, 0x1b, 0x2e, 0x45, 0x5f, 0x79, 0x93, 0xab, 0xc1, 0xd4, 0xe5, 0xf3, 0xff}, {0x01, 0x0c, 0x1f, 0x3a, 0x53, 0x6d, 0x85, 0x9c, 0xb0, 0xc2, 0xd1, 0xde, 0xe9, 0xf2, 0xf9, 0xff}, {0x04, 0x16, 0x30, 0x4e, 0x68, 0x81, 0x98, 0xac, 0xbe, 0xcd, 0xda, 0xe4, 0xed, 0xf5, 0xfb, 0xff}, {0x13, 0x38, 0x59, 0x79, 0x92, 0xa7, 0xb9, 0xc8, 0xd4, 0xdf, 0xe7, 0xee, 0xf4, 0xf9, 0xfc, 0xff}, {0x20, 0x4b, 0x6e, 0x8d, 0xa3, 0xb5, 0xc5, 0xd2, 0xdc, 0xe5, 0xec, 0xf2, 0xf6, 0xfa, 0xfd, 0xff}, {0x24, 0x44, 0x64, 0x84, 0x9d, 0xb2, 0xc4, 0xd3, 0xe0, 0xeb, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xff}, }; Tgamma = gamma_tb[gamma - 1]; contrast -= 128; /* -128 / 127 */ brightness -= 128; /* -128 / 92 */ adj = 0; gp1 = gp2 = 0; for (i = 0; i < 16; i++) { g = Tgamma[i] + delta_b[i] * brightness / 256 - delta_c[i] * contrast / 256 - adj / 2; if (g > 0xff) g = 0xff; else if (g < 0) g = 0; reg_w(gspca_dev, g, 0x0120 + i); /* gamma */ if (contrast > 0) adj--; else if (contrast < 0) adj++; if (i > 1) gr[i - 1] = (g - gp2) / 2; else if (i != 0) gr[0] = gp1 == 0 ? 0 : (g - gp1); gp2 = gp1; gp1 = g; } gr[15] = (0xff - gp2) / 2; for (i = 0; i < 16; i++) reg_w(gspca_dev, gr[i], 0x0130 + i); /* gradient */ } static s32 getexposure(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; switch (sd->sensor) { case SENSOR_HV7131R: return (i2c_read(gspca_dev, 0x25) << 9) | (i2c_read(gspca_dev, 0x26) << 1) | (i2c_read(gspca_dev, 0x27) >> 7); case SENSOR_OV7620: return i2c_read(gspca_dev, 0x10); default: return -1; } } static void setexposure(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; switch (sd->sensor) { case SENSOR_HV7131R: i2c_write(gspca_dev, 0x25, val >> 9, 0x00); i2c_write(gspca_dev, 0x26, val >> 1, 0x00); i2c_write(gspca_dev, 0x27, val << 7, 0x00); break; case SENSOR_OV7620: i2c_write(gspca_dev, 0x10, val, 0x00); break; } } static void setquality(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; jpeg_set_qual(sd->jpeg_hdr, jpeg_qual[sd->reg08 >> 1]); reg_w(gspca_dev, sd->reg08, ZC3XX_R008_CLOCKSETTING); } /* Matches the sensor's internal frame rate to the lighting frequency. * Valid frequencies are: * 50Hz, for European and Asian lighting (default) * 60Hz, for American lighting * 0 = No Flicker (for outdoor usage) */ static void setlightfreq(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; int i, mode; const struct usb_action *zc3_freq; static const struct usb_action *freq_tb[SENSOR_MAX][6] = { [SENSOR_ADCM2700] = { adcm2700_NoFlicker, adcm2700_NoFlicker, adcm2700_50HZ, adcm2700_50HZ, adcm2700_60HZ, adcm2700_60HZ}, [SENSOR_CS2102] = { cs2102_NoFlicker, cs2102_NoFlickerScale, cs2102_50HZ, cs2102_50HZScale, cs2102_60HZ, cs2102_60HZScale}, [SENSOR_CS2102K] = { cs2102_NoFlicker, cs2102_NoFlickerScale, NULL, NULL, /* currently disabled */ NULL, NULL}, [SENSOR_GC0303] = { gc0303_NoFlicker, gc0303_NoFlickerScale, gc0303_50HZ, gc0303_50HZScale, gc0303_60HZ, gc0303_60HZScale}, [SENSOR_GC0305] = { gc0305_NoFlicker, gc0305_NoFlicker, gc0305_50HZ, gc0305_50HZ, gc0305_60HZ, gc0305_60HZ}, [SENSOR_HDCS2020] = { hdcs2020_NoFlicker, hdcs2020_NoFlicker, hdcs2020_50HZ, hdcs2020_50HZ, hdcs2020_60HZ, hdcs2020_60HZ}, [SENSOR_HV7131B] = { hv7131b_NoFlicker, hv7131b_NoFlickerScale, hv7131b_50HZ, hv7131b_50HZScale, hv7131b_60HZ, hv7131b_60HZScale}, [SENSOR_HV7131R] = { hv7131r_NoFlicker, hv7131r_NoFlickerScale, hv7131r_50HZ, hv7131r_50HZScale, hv7131r_60HZ, hv7131r_60HZScale}, [SENSOR_ICM105A] = { icm105a_NoFlicker, icm105a_NoFlickerScale, icm105a_50HZ, icm105a_50HZScale, icm105a_60HZ, icm105a_60HZScale}, [SENSOR_MC501CB] = { mc501cb_NoFlicker, mc501cb_NoFlickerScale, mc501cb_50HZ, mc501cb_50HZScale, mc501cb_60HZ, mc501cb_60HZScale}, [SENSOR_MT9V111_1] = { mt9v111_1_AENoFlicker, mt9v111_1_AENoFlickerScale, mt9v111_1_AE50HZ, mt9v111_1_AE50HZScale, mt9v111_1_AE60HZ, mt9v111_1_AE60HZScale}, [SENSOR_MT9V111_3] = { mt9v111_3_AENoFlicker, mt9v111_3_AENoFlickerScale, mt9v111_3_AE50HZ, mt9v111_3_AE50HZScale, mt9v111_3_AE60HZ, mt9v111_3_AE60HZScale}, [SENSOR_OV7620] = { ov7620_NoFlicker, ov7620_NoFlicker, ov7620_50HZ, ov7620_50HZ, ov7620_60HZ, ov7620_60HZ}, [SENSOR_OV7630C] = { NULL, NULL, NULL, NULL, NULL, NULL}, [SENSOR_PAS106] = { pas106b_NoFlicker, pas106b_NoFlicker, pas106b_50HZ, pas106b_50HZ, pas106b_60HZ, pas106b_60HZ}, [SENSOR_PAS202B] = { pas202b_NoFlicker, pas202b_NoFlickerScale, pas202b_50HZ, pas202b_50HZScale, pas202b_60HZ, pas202b_60HZScale}, [SENSOR_PB0330] = { pb0330_NoFlicker, pb0330_NoFlickerScale, pb0330_50HZ, pb0330_50HZScale, pb0330_60HZ, pb0330_60HZScale}, [SENSOR_PO2030] = { po2030_NoFlicker, po2030_NoFlicker, po2030_50HZ, po2030_50HZ, po2030_60HZ, po2030_60HZ}, [SENSOR_TAS5130C] = { tas5130c_NoFlicker, tas5130c_NoFlickerScale, tas5130c_50HZ, tas5130c_50HZScale, tas5130c_60HZ, tas5130c_60HZScale}, }; i = val * 2; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; if (mode) i++; /* 320x240 */ zc3_freq = freq_tb[sd->sensor][i]; if (zc3_freq == NULL) return; usb_exchange(gspca_dev, zc3_freq); switch (sd->sensor) { case SENSOR_GC0305: if (mode /* if 320x240 */ && val == 1) /* and 50Hz */ reg_w(gspca_dev, 0x85, 0x018d); /* win: 0x80, 0x018d */ break; case SENSOR_OV7620: if (!mode) { /* if 640x480 */ if (val != 0) /* and filter */ reg_w(gspca_dev, 0x40, 0x0002); else reg_w(gspca_dev, 0x44, 0x0002); } break; case SENSOR_PAS202B: reg_w(gspca_dev, 0x00, 0x01a7); break; } } static void setautogain(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; if (sd->sensor == SENSOR_OV7620) i2c_write(gspca_dev, 0x13, val ? 0xa3 : 0x80, 0x00); else reg_w(gspca_dev, val ? 0x42 : 0x02, 0x0180); } /* * Update the transfer parameters. * This function is executed from a work queue. */ static void transfer_update(struct work_struct *work) { struct sd *sd = container_of(work, struct sd, work); struct gspca_dev *gspca_dev = &sd->gspca_dev; int change, good; u8 reg07, reg11; /* reg07 gets set to 0 by sd_start before starting us */ reg07 = 0; good = 0; while (1) { msleep(100); /* To protect gspca_dev->usb_buf and gspca_dev->usb_err */ mutex_lock(&gspca_dev->usb_lock); #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif if (!gspca_dev->present || !gspca_dev->streaming) break; /* Bit 0 of register 11 indicates FIFO overflow */ gspca_dev->usb_err = 0; reg11 = reg_r(gspca_dev, 0x0011); if (gspca_dev->usb_err) break; change = reg11 & 0x01; if (change) { /* overflow */ good = 0; if (reg07 == 0) /* Bit Rate Control not enabled? */ reg07 = 0x32; /* Allow 98 bytes / unit */ else if (reg07 > 2) reg07 -= 2; /* Decrease allowed bytes / unit */ else change = 0; } else { /* no overflow */ good++; if (good >= 10) { good = 0; if (reg07) { /* BRC enabled? */ change = 1; if (reg07 < 0x32) reg07 += 2; else reg07 = 0; } } } if (change) { gspca_dev->usb_err = 0; reg_w(gspca_dev, reg07, 0x0007); if (gspca_dev->usb_err) break; } mutex_unlock(&gspca_dev->usb_lock); } /* Something went wrong. Unlock and return */ mutex_unlock(&gspca_dev->usb_lock); } static void send_unknown(struct gspca_dev *gspca_dev, int sensor) { reg_w(gspca_dev, 0x01, 0x0000); /* bridge reset */ switch (sensor) { case SENSOR_PAS106: reg_w(gspca_dev, 0x03, 0x003a); reg_w(gspca_dev, 0x0c, 0x003b); reg_w(gspca_dev, 0x08, 0x0038); break; case SENSOR_ADCM2700: case SENSOR_GC0305: case SENSOR_OV7620: case SENSOR_MT9V111_1: case SENSOR_MT9V111_3: case SENSOR_PB0330: case SENSOR_PO2030: reg_w(gspca_dev, 0x0d, 0x003a); reg_w(gspca_dev, 0x02, 0x003b); reg_w(gspca_dev, 0x00, 0x0038); break; case SENSOR_HV7131R: case SENSOR_PAS202B: reg_w(gspca_dev, 0x03, 0x003b); reg_w(gspca_dev, 0x0c, 0x003a); reg_w(gspca_dev, 0x0b, 0x0039); if (sensor == SENSOR_PAS202B) reg_w(gspca_dev, 0x0b, 0x0038); break; } } /* start probe 2 wires */ static void start_2wr_probe(struct gspca_dev *gspca_dev, int sensor) { reg_w(gspca_dev, 0x01, 0x0000); reg_w(gspca_dev, sensor, 0x0010); reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0x03, 0x0012); reg_w(gspca_dev, 0x01, 0x0012); /* msleep(2); */ } static int sif_probe(struct gspca_dev *gspca_dev) { u16 checkword; start_2wr_probe(gspca_dev, 0x0f); /* PAS106 */ reg_w(gspca_dev, 0x08, 0x008d); msleep(150); checkword = ((i2c_read(gspca_dev, 0x00) & 0x0f) << 4) | ((i2c_read(gspca_dev, 0x01) & 0xf0) >> 4); gspca_dbg(gspca_dev, D_PROBE, "probe sif 0x%04x\n", checkword); if (checkword == 0x0007) { send_unknown(gspca_dev, SENSOR_PAS106); return 0x0f; /* PAS106 */ } return -1; } static int vga_2wr_probe(struct gspca_dev *gspca_dev) { u16 retword; start_2wr_probe(gspca_dev, 0x00); /* HV7131B */ i2c_write(gspca_dev, 0x01, 0xaa, 0x00); retword = i2c_read(gspca_dev, 0x01); if (retword != 0) return 0x00; /* HV7131B */ start_2wr_probe(gspca_dev, 0x04); /* CS2102 */ i2c_write(gspca_dev, 0x01, 0xaa, 0x00); retword = i2c_read(gspca_dev, 0x01); if (retword != 0) return 0x04; /* CS2102 */ start_2wr_probe(gspca_dev, 0x06); /* OmniVision */ reg_w(gspca_dev, 0x08, 0x008d); i2c_write(gspca_dev, 0x11, 0xaa, 0x00); retword = i2c_read(gspca_dev, 0x11); if (retword != 0) { /* (should have returned 0xaa) --> Omnivision? */ /* reg_r 0x10 -> 0x06 --> */ goto ov_check; } start_2wr_probe(gspca_dev, 0x08); /* HDCS2020 */ i2c_write(gspca_dev, 0x1c, 0x00, 0x00); i2c_write(gspca_dev, 0x15, 0xaa, 0x00); retword = i2c_read(gspca_dev, 0x15); if (retword != 0) return 0x08; /* HDCS2020 */ start_2wr_probe(gspca_dev, 0x0a); /* PB0330 */ i2c_write(gspca_dev, 0x07, 0xaa, 0xaa); retword = i2c_read(gspca_dev, 0x07); if (retword != 0) return 0x0a; /* PB0330 */ retword = i2c_read(gspca_dev, 0x03); if (retword != 0) return 0x0a; /* PB0330 ?? */ retword = i2c_read(gspca_dev, 0x04); if (retword != 0) return 0x0a; /* PB0330 ?? */ start_2wr_probe(gspca_dev, 0x0c); /* ICM105A */ i2c_write(gspca_dev, 0x01, 0x11, 0x00); retword = i2c_read(gspca_dev, 0x01); if (retword != 0) return 0x0c; /* ICM105A */ start_2wr_probe(gspca_dev, 0x0e); /* PAS202BCB */ reg_w(gspca_dev, 0x08, 0x008d); i2c_write(gspca_dev, 0x03, 0xaa, 0x00); msleep(50); retword = i2c_read(gspca_dev, 0x03); if (retword != 0) { send_unknown(gspca_dev, SENSOR_PAS202B); return 0x0e; /* PAS202BCB */ } start_2wr_probe(gspca_dev, 0x02); /* TAS5130C */ i2c_write(gspca_dev, 0x01, 0xaa, 0x00); retword = i2c_read(gspca_dev, 0x01); if (retword != 0) return 0x02; /* TAS5130C */ ov_check: reg_r(gspca_dev, 0x0010); /* ?? */ reg_r(gspca_dev, 0x0010); reg_w(gspca_dev, 0x01, 0x0000); reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0x06, 0x0010); /* OmniVision */ reg_w(gspca_dev, 0xa1, 0x008b); reg_w(gspca_dev, 0x08, 0x008d); msleep(500); reg_w(gspca_dev, 0x01, 0x0012); i2c_write(gspca_dev, 0x12, 0x80, 0x00); /* sensor reset */ retword = i2c_read(gspca_dev, 0x0a) << 8; retword |= i2c_read(gspca_dev, 0x0b); gspca_dbg(gspca_dev, D_PROBE, "probe 2wr ov vga 0x%04x\n", retword); switch (retword) { case 0x7631: /* OV7630C */ reg_w(gspca_dev, 0x06, 0x0010); break; case 0x7620: /* OV7620 */ case 0x7648: /* OV7648 */ break; default: return -1; /* not OmniVision */ } return retword; } struct sensor_by_chipset_revision { u16 revision; u8 internal_sensor_id; }; static const struct sensor_by_chipset_revision chipset_revision_sensor[] = { {0xc000, 0x12}, /* TAS5130C */ {0xc001, 0x13}, /* MT9V111 */ {0xe001, 0x13}, {0x8001, 0x13}, {0x8000, 0x14}, /* CS2102K */ {0x8400, 0x15}, /* MT9V111 */ {0xe400, 0x15}, }; static int vga_3wr_probe(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int i; u16 retword; /*fixme: lack of 8b=b3 (11,12)-> 10, 8b=e0 (14,15,16)-> 12 found in gspcav1*/ reg_w(gspca_dev, 0x02, 0x0010); reg_r(gspca_dev, 0x0010); reg_w(gspca_dev, 0x01, 0x0000); reg_w(gspca_dev, 0x00, 0x0010); reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0x91, 0x008b); reg_w(gspca_dev, 0x03, 0x0012); reg_w(gspca_dev, 0x01, 0x0012); reg_w(gspca_dev, 0x05, 0x0012); retword = i2c_read(gspca_dev, 0x14); if (retword != 0) return 0x11; /* HV7131R */ retword = i2c_read(gspca_dev, 0x15); if (retword != 0) return 0x11; /* HV7131R */ retword = i2c_read(gspca_dev, 0x16); if (retword != 0) return 0x11; /* HV7131R */ reg_w(gspca_dev, 0x02, 0x0010); retword = reg_r(gspca_dev, 0x000b) << 8; retword |= reg_r(gspca_dev, 0x000a); gspca_dbg(gspca_dev, D_PROBE, "probe 3wr vga 1 0x%04x\n", retword); reg_r(gspca_dev, 0x0010); if ((retword & 0xff00) == 0x6400) return 0x02; /* TAS5130C */ for (i = 0; i < ARRAY_SIZE(chipset_revision_sensor); i++) { if (chipset_revision_sensor[i].revision == retword) { sd->chip_revision = retword; send_unknown(gspca_dev, SENSOR_PB0330); return chipset_revision_sensor[i].internal_sensor_id; } } reg_w(gspca_dev, 0x01, 0x0000); /* check PB0330 */ reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0xdd, 0x008b); reg_w(gspca_dev, 0x0a, 0x0010); reg_w(gspca_dev, 0x03, 0x0012); reg_w(gspca_dev, 0x01, 0x0012); retword = i2c_read(gspca_dev, 0x00); if (retword != 0) { gspca_dbg(gspca_dev, D_PROBE, "probe 3wr vga type 0a\n"); return 0x0a; /* PB0330 */ } /* probe gc0303 / gc0305 */ reg_w(gspca_dev, 0x01, 0x0000); reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0x98, 0x008b); reg_w(gspca_dev, 0x01, 0x0010); reg_w(gspca_dev, 0x03, 0x0012); msleep(2); reg_w(gspca_dev, 0x01, 0x0012); retword = i2c_read(gspca_dev, 0x00); if (retword != 0) { gspca_dbg(gspca_dev, D_PROBE, "probe 3wr vga type %02x\n", retword); if (retword == 0x0011) /* gc0303 */ return 0x0303; if (retword == 0x0029) /* gc0305 */ send_unknown(gspca_dev, SENSOR_GC0305); return retword; } reg_w(gspca_dev, 0x01, 0x0000); /* check OmniVision */ reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0xa1, 0x008b); reg_w(gspca_dev, 0x08, 0x008d); reg_w(gspca_dev, 0x06, 0x0010); reg_w(gspca_dev, 0x01, 0x0012); reg_w(gspca_dev, 0x05, 0x0012); if (i2c_read(gspca_dev, 0x1c) == 0x007f /* OV7610 - manufacturer ID */ && i2c_read(gspca_dev, 0x1d) == 0x00a2) { send_unknown(gspca_dev, SENSOR_OV7620); return 0x06; /* OmniVision confirm ? */ } reg_w(gspca_dev, 0x01, 0x0000); reg_w(gspca_dev, 0x00, 0x0002); reg_w(gspca_dev, 0x01, 0x0010); reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0xee, 0x008b); reg_w(gspca_dev, 0x03, 0x0012); reg_w(gspca_dev, 0x01, 0x0012); reg_w(gspca_dev, 0x05, 0x0012); retword = i2c_read(gspca_dev, 0x00) << 8; /* ID 0 */ retword |= i2c_read(gspca_dev, 0x01); /* ID 1 */ gspca_dbg(gspca_dev, D_PROBE, "probe 3wr vga 2 0x%04x\n", retword); if (retword == 0x2030) { u8 retbyte; retbyte = i2c_read(gspca_dev, 0x02); /* revision number */ gspca_dbg(gspca_dev, D_PROBE, "sensor PO2030 rev 0x%02x\n", retbyte); send_unknown(gspca_dev, SENSOR_PO2030); return retword; } reg_w(gspca_dev, 0x01, 0x0000); reg_w(gspca_dev, 0x0a, 0x0010); reg_w(gspca_dev, 0xd3, 0x008b); reg_w(gspca_dev, 0x01, 0x0001); reg_w(gspca_dev, 0x03, 0x0012); reg_w(gspca_dev, 0x01, 0x0012); reg_w(gspca_dev, 0x05, 0x0012); reg_w(gspca_dev, 0xd3, 0x008b); retword = i2c_read(gspca_dev, 0x01); if (retword != 0) { gspca_dbg(gspca_dev, D_PROBE, "probe 3wr vga type 0a ? ret: %04x\n", retword); return 0x16; /* adcm2700 (6100/6200) */ } return -1; } static int zcxx_probeSensor(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int sensor; switch (sd->sensor) { case SENSOR_MC501CB: return -1; /* don't probe */ case SENSOR_GC0303: /* may probe but with no write in reg 0x0010 */ return -1; /* don't probe */ case SENSOR_PAS106: sensor = sif_probe(gspca_dev); if (sensor >= 0) return sensor; break; } sensor = vga_2wr_probe(gspca_dev); if (sensor >= 0) return sensor; return vga_3wr_probe(gspca_dev); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; if (id->idProduct == 0x301b) sd->bridge = BRIDGE_ZC301; else sd->bridge = BRIDGE_ZC303; /* define some sensors from the vendor/product */ sd->sensor = id->driver_info; sd->reg08 = REG08_DEF; INIT_WORK(&sd->work, transfer_update); return 0; } static int zcxx_g_volatile_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; switch (ctrl->id) { case V4L2_CID_AUTOGAIN: gspca_dev->usb_err = 0; if (ctrl->val && sd->exposure && gspca_dev->streaming) sd->exposure->val = getexposure(gspca_dev); return gspca_dev->usb_err; } return -EINVAL; } static int zcxx_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; int i, qual; gspca_dev->usb_err = 0; if (ctrl->id == V4L2_CID_JPEG_COMPRESSION_QUALITY) { qual = sd->reg08 >> 1; for (i = 0; i < ARRAY_SIZE(jpeg_qual); i++) { if (ctrl->val <= jpeg_qual[i]) break; } if (i == ARRAY_SIZE(jpeg_qual) || (i > 0 && i == qual && ctrl->val < jpeg_qual[i])) i--; /* With high quality settings we need max bandwidth */ if (i >= 2 && gspca_dev->streaming && !gspca_dev->cam.needs_full_bandwidth) return -EBUSY; sd->reg08 = (i << 1) | 1; ctrl->val = jpeg_qual[i]; } if (!gspca_dev->streaming) return 0; switch (ctrl->id) { /* gamma/brightness/contrast cluster */ case V4L2_CID_GAMMA: setcontrast(gspca_dev, sd->gamma->val, sd->brightness->val, sd->contrast->val); break; /* autogain/exposure cluster */ case V4L2_CID_AUTOGAIN: setautogain(gspca_dev, ctrl->val); if (!gspca_dev->usb_err && !ctrl->val && sd->exposure) setexposure(gspca_dev, sd->exposure->val); break; case V4L2_CID_POWER_LINE_FREQUENCY: setlightfreq(gspca_dev, ctrl->val); break; case V4L2_CID_SHARPNESS: setsharpness(gspca_dev, ctrl->val); break; case V4L2_CID_JPEG_COMPRESSION_QUALITY: setquality(gspca_dev); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops zcxx_ctrl_ops = { .g_volatile_ctrl = zcxx_g_volatile_ctrl, .s_ctrl = zcxx_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; static const u8 gamma[SENSOR_MAX] = { [SENSOR_ADCM2700] = 4, [SENSOR_CS2102] = 4, [SENSOR_CS2102K] = 5, [SENSOR_GC0303] = 3, [SENSOR_GC0305] = 4, [SENSOR_HDCS2020] = 4, [SENSOR_HV7131B] = 4, [SENSOR_HV7131R] = 4, [SENSOR_ICM105A] = 4, [SENSOR_MC501CB] = 4, [SENSOR_MT9V111_1] = 4, [SENSOR_MT9V111_3] = 4, [SENSOR_OV7620] = 3, [SENSOR_OV7630C] = 4, [SENSOR_PAS106] = 4, [SENSOR_PAS202B] = 4, [SENSOR_PB0330] = 4, [SENSOR_PO2030] = 4, [SENSOR_TAS5130C] = 3, }; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 8); sd->brightness = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); sd->contrast = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_CONTRAST, 0, 255, 1, 128); sd->gamma = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_GAMMA, 1, 6, 1, gamma[sd->sensor]); if (sd->sensor == SENSOR_HV7131R) sd->exposure = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_EXPOSURE, 0x30d, 0x493e, 1, 0x927); else if (sd->sensor == SENSOR_OV7620) sd->exposure = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_EXPOSURE, 0, 255, 1, 0x41); sd->autogain = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); if (sd->sensor != SENSOR_OV7630C) sd->plfreq = v4l2_ctrl_new_std_menu(hdl, &zcxx_ctrl_ops, V4L2_CID_POWER_LINE_FREQUENCY, V4L2_CID_POWER_LINE_FREQUENCY_60HZ, 0, V4L2_CID_POWER_LINE_FREQUENCY_DISABLED); sd->sharpness = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_SHARPNESS, 0, 3, 1, sd->sensor == SENSOR_PO2030 ? 0 : 2); sd->jpegqual = v4l2_ctrl_new_std(hdl, &zcxx_ctrl_ops, V4L2_CID_JPEG_COMPRESSION_QUALITY, jpeg_qual[0], jpeg_qual[ARRAY_SIZE(jpeg_qual) - 1], 1, jpeg_qual[REG08_DEF >> 1]); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } v4l2_ctrl_cluster(3, &sd->gamma); if (sd->sensor == SENSOR_HV7131R || sd->sensor == SENSOR_OV7620) v4l2_ctrl_auto_cluster(2, &sd->autogain, 0, true); return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; int sensor; static const u8 mode_tb[SENSOR_MAX] = { [SENSOR_ADCM2700] = 2, [SENSOR_CS2102] = 1, [SENSOR_CS2102K] = 1, [SENSOR_GC0303] = 1, [SENSOR_GC0305] = 1, [SENSOR_HDCS2020] = 1, [SENSOR_HV7131B] = 1, [SENSOR_HV7131R] = 1, [SENSOR_ICM105A] = 1, [SENSOR_MC501CB] = 2, [SENSOR_MT9V111_1] = 1, [SENSOR_MT9V111_3] = 1, [SENSOR_OV7620] = 2, [SENSOR_OV7630C] = 1, [SENSOR_PAS106] = 0, [SENSOR_PAS202B] = 1, [SENSOR_PB0330] = 1, [SENSOR_PO2030] = 1, [SENSOR_TAS5130C] = 1, }; sensor = zcxx_probeSensor(gspca_dev); if (sensor >= 0) gspca_dbg(gspca_dev, D_PROBE, "probe sensor -> %04x\n", sensor); if ((unsigned) force_sensor < SENSOR_MAX) { sd->sensor = force_sensor; gspca_dbg(gspca_dev, D_PROBE, "sensor forced to %d\n", force_sensor); } else { switch (sensor) { case -1: switch (sd->sensor) { case SENSOR_MC501CB: gspca_dbg(gspca_dev, D_PROBE, "Sensor MC501CB\n"); break; case SENSOR_GC0303: gspca_dbg(gspca_dev, D_PROBE, "Sensor GC0303\n"); break; default: pr_warn("Unknown sensor - set to TAS5130C\n"); sd->sensor = SENSOR_TAS5130C; } break; case 0: /* check the sensor type */ sensor = i2c_read(gspca_dev, 0x00); gspca_dbg(gspca_dev, D_PROBE, "Sensor hv7131 type %d\n", sensor); switch (sensor) { case 0: /* hv7131b */ case 1: /* hv7131e */ gspca_dbg(gspca_dev, D_PROBE, "Find Sensor HV7131B\n"); sd->sensor = SENSOR_HV7131B; break; default: /* case 2: * hv7131r */ gspca_dbg(gspca_dev, D_PROBE, "Find Sensor HV7131R\n"); sd->sensor = SENSOR_HV7131R; break; } break; case 0x02: gspca_dbg(gspca_dev, D_PROBE, "Sensor TAS5130C\n"); sd->sensor = SENSOR_TAS5130C; break; case 0x04: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor CS2102\n"); sd->sensor = SENSOR_CS2102; break; case 0x08: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor HDCS2020\n"); sd->sensor = SENSOR_HDCS2020; break; case 0x0a: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor PB0330. Chip revision %x\n", sd->chip_revision); sd->sensor = SENSOR_PB0330; break; case 0x0c: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor ICM105A\n"); sd->sensor = SENSOR_ICM105A; break; case 0x0e: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor PAS202B\n"); sd->sensor = SENSOR_PAS202B; break; case 0x0f: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor PAS106\n"); sd->sensor = SENSOR_PAS106; break; case 0x10: case 0x12: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor TAS5130C\n"); sd->sensor = SENSOR_TAS5130C; break; case 0x11: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor HV7131R\n"); sd->sensor = SENSOR_HV7131R; break; case 0x13: case 0x15: gspca_dbg(gspca_dev, D_PROBE, "Sensor MT9V111. Chip revision %04x\n", sd->chip_revision); sd->sensor = sd->bridge == BRIDGE_ZC301 ? SENSOR_MT9V111_1 : SENSOR_MT9V111_3; break; case 0x14: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor CS2102K?. Chip revision %x\n", sd->chip_revision); sd->sensor = SENSOR_CS2102K; break; case 0x16: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor ADCM2700\n"); sd->sensor = SENSOR_ADCM2700; break; case 0x29: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor GC0305\n"); sd->sensor = SENSOR_GC0305; break; case 0x0303: gspca_dbg(gspca_dev, D_PROBE, "Sensor GC0303\n"); sd->sensor = SENSOR_GC0303; break; case 0x2030: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor PO2030\n"); sd->sensor = SENSOR_PO2030; break; case 0x7620: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor OV7620\n"); sd->sensor = SENSOR_OV7620; break; case 0x7631: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor OV7630C\n"); sd->sensor = SENSOR_OV7630C; break; case 0x7648: gspca_dbg(gspca_dev, D_PROBE, "Find Sensor OV7648\n"); sd->sensor = SENSOR_OV7620; /* same sensor (?) */ break; default: pr_err("Unknown sensor %04x\n", sensor); return -EINVAL; } } if (sensor < 0x20) { if (sensor == -1 || sensor == 0x10 || sensor == 0x12) reg_w(gspca_dev, 0x02, 0x0010); reg_r(gspca_dev, 0x0010); } cam = &gspca_dev->cam; switch (mode_tb[sd->sensor]) { case 0: cam->cam_mode = sif_mode; cam->nmodes = ARRAY_SIZE(sif_mode); break; case 1: cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); break; default: /* case 2: */ cam->cam_mode = broken_vga_mode; cam->nmodes = ARRAY_SIZE(broken_vga_mode); break; } /* switch off the led */ reg_w(gspca_dev, 0x01, 0x0000); return gspca_dev->usb_err; } static int sd_pre_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; gspca_dev->cam.needs_full_bandwidth = (sd->reg08 >= 4) ? 1 : 0; return 0; } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int mode; static const struct usb_action *init_tb[SENSOR_MAX][2] = { [SENSOR_ADCM2700] = {adcm2700_Initial, adcm2700_InitialScale}, [SENSOR_CS2102] = {cs2102_Initial, cs2102_InitialScale}, [SENSOR_CS2102K] = {cs2102K_Initial, cs2102K_InitialScale}, [SENSOR_GC0303] = {gc0303_Initial, gc0303_InitialScale}, [SENSOR_GC0305] = {gc0305_Initial, gc0305_InitialScale}, [SENSOR_HDCS2020] = {hdcs2020_Initial, hdcs2020_InitialScale}, [SENSOR_HV7131B] = {hv7131b_Initial, hv7131b_InitialScale}, [SENSOR_HV7131R] = {hv7131r_Initial, hv7131r_InitialScale}, [SENSOR_ICM105A] = {icm105a_Initial, icm105a_InitialScale}, [SENSOR_MC501CB] = {mc501cb_Initial, mc501cb_InitialScale}, [SENSOR_MT9V111_1] = {mt9v111_1_Initial, mt9v111_1_InitialScale}, [SENSOR_MT9V111_3] = {mt9v111_3_Initial, mt9v111_3_InitialScale}, [SENSOR_OV7620] = {ov7620_Initial, ov7620_InitialScale}, [SENSOR_OV7630C] = {ov7630c_Initial, ov7630c_InitialScale}, [SENSOR_PAS106] = {pas106b_Initial, pas106b_InitialScale}, [SENSOR_PAS202B] = {pas202b_Initial, pas202b_InitialScale}, [SENSOR_PB0330] = {pb0330_Initial, pb0330_InitialScale}, [SENSOR_PO2030] = {po2030_Initial, po2030_InitialScale}, [SENSOR_TAS5130C] = {tas5130c_Initial, tas5130c_InitialScale}, }; /* create the JPEG header */ jpeg_define(sd->jpeg_hdr, gspca_dev->pixfmt.height, gspca_dev->pixfmt.width, 0x21); /* JPEG 422 */ mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; switch (sd->sensor) { case SENSOR_HV7131R: zcxx_probeSensor(gspca_dev); break; case SENSOR_PAS106: usb_exchange(gspca_dev, pas106b_Initial_com); break; } usb_exchange(gspca_dev, init_tb[sd->sensor][mode]); switch (sd->sensor) { case SENSOR_ADCM2700: case SENSOR_GC0305: case SENSOR_OV7620: case SENSOR_PO2030: case SENSOR_TAS5130C: case SENSOR_GC0303: /* msleep(100); * ?? */ reg_r(gspca_dev, 0x0002); /* --> 0x40 */ reg_w(gspca_dev, 0x09, 0x01ad); /* (from win traces) */ reg_w(gspca_dev, 0x15, 0x01ae); if (sd->sensor == SENSOR_TAS5130C) break; reg_w(gspca_dev, 0x0d, 0x003a); reg_w(gspca_dev, 0x02, 0x003b); reg_w(gspca_dev, 0x00, 0x0038); break; case SENSOR_HV7131R: case SENSOR_PAS202B: reg_w(gspca_dev, 0x03, 0x003b); reg_w(gspca_dev, 0x0c, 0x003a); reg_w(gspca_dev, 0x0b, 0x0039); if (sd->sensor == SENSOR_HV7131R) reg_w(gspca_dev, 0x50, ZC3XX_R11D_GLOBALGAIN); break; } setmatrix(gspca_dev); switch (sd->sensor) { case SENSOR_ADCM2700: case SENSOR_OV7620: reg_r(gspca_dev, 0x0008); reg_w(gspca_dev, 0x00, 0x0008); break; case SENSOR_PAS202B: case SENSOR_GC0305: case SENSOR_HV7131R: case SENSOR_TAS5130C: reg_r(gspca_dev, 0x0008); fallthrough; case SENSOR_PO2030: reg_w(gspca_dev, 0x03, 0x0008); break; } setsharpness(gspca_dev, v4l2_ctrl_g_ctrl(sd->sharpness)); /* set the gamma tables when not set */ switch (sd->sensor) { case SENSOR_CS2102K: /* gamma set in xxx_Initial */ case SENSOR_HDCS2020: case SENSOR_OV7630C: break; default: setcontrast(gspca_dev, v4l2_ctrl_g_ctrl(sd->gamma), v4l2_ctrl_g_ctrl(sd->brightness), v4l2_ctrl_g_ctrl(sd->contrast)); break; } setmatrix(gspca_dev); /* one more time? */ switch (sd->sensor) { case SENSOR_OV7620: case SENSOR_PAS202B: reg_r(gspca_dev, 0x0180); /* from win */ reg_w(gspca_dev, 0x00, 0x0180); break; } setquality(gspca_dev); /* Start with BRC disabled, transfer_update will enable it if needed */ reg_w(gspca_dev, 0x00, 0x0007); if (sd->plfreq) setlightfreq(gspca_dev, v4l2_ctrl_g_ctrl(sd->plfreq)); switch (sd->sensor) { case SENSOR_ADCM2700: reg_w(gspca_dev, 0x09, 0x01ad); /* (from win traces) */ reg_w(gspca_dev, 0x15, 0x01ae); reg_w(gspca_dev, 0x02, 0x0180); /* ms-win + */ reg_w(gspca_dev, 0x40, 0x0117); break; case SENSOR_HV7131R: setexposure(gspca_dev, v4l2_ctrl_g_ctrl(sd->exposure)); reg_w(gspca_dev, 0x00, ZC3XX_R1A7_CALCGLOBALMEAN); break; case SENSOR_GC0305: case SENSOR_TAS5130C: reg_w(gspca_dev, 0x09, 0x01ad); /* (from win traces) */ reg_w(gspca_dev, 0x15, 0x01ae); fallthrough; case SENSOR_PAS202B: case SENSOR_PO2030: /* reg_w(gspca_dev, 0x40, ZC3XX_R117_GGAIN); in win traces */ reg_r(gspca_dev, 0x0180); break; case SENSOR_OV7620: reg_w(gspca_dev, 0x09, 0x01ad); reg_w(gspca_dev, 0x15, 0x01ae); i2c_read(gspca_dev, 0x13); /*fixme: returns 0xa3 */ i2c_write(gspca_dev, 0x13, 0xa3, 0x00); /*fixme: returned value to send? */ reg_w(gspca_dev, 0x40, 0x0117); reg_r(gspca_dev, 0x0180); break; } setautogain(gspca_dev, v4l2_ctrl_g_ctrl(sd->autogain)); if (gspca_dev->usb_err < 0) return gspca_dev->usb_err; /* Start the transfer parameters update thread */ schedule_work(&sd->work); return 0; } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; mutex_unlock(&gspca_dev->usb_lock); flush_work(&sd->work); mutex_lock(&gspca_dev->usb_lock); if (!gspca_dev->present) return; send_unknown(gspca_dev, sd->sensor); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, int len) { struct sd *sd = (struct sd *) gspca_dev; /* check the JPEG end of frame */ if (len >= 3 && data[len - 3] == 0xff && data[len - 2] == 0xd9) { /*fixme: what does the last byte mean?*/ gspca_frame_add(gspca_dev, LAST_PACKET, data, len - 1); return; } /* check the JPEG start of a frame */ if (data[0] == 0xff && data[1] == 0xd8) { /* put the JPEG header in the new frame */ gspca_frame_add(gspca_dev, FIRST_PACKET, sd->jpeg_hdr, JPEG_HDR_SZ); /* remove the webcam's header: * ff d8 ff fe 00 0e 00 00 ss ss 00 01 ww ww hh hh pp pp * - 'ss ss' is the frame sequence number (BE) * - 'ww ww' and 'hh hh' are the window dimensions (BE) * - 'pp pp' is the packet sequence number (BE) */ data += 18; len -= 18; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static int sd_set_jcomp(struct gspca_dev *gspca_dev, const struct v4l2_jpegcompression *jcomp) { struct sd *sd = (struct sd *) gspca_dev; return v4l2_ctrl_s_ctrl(sd->jpegqual, jcomp->quality); } static int sd_get_jcomp(struct gspca_dev *gspca_dev, struct v4l2_jpegcompression *jcomp) { struct sd *sd = (struct sd *) gspca_dev; memset(jcomp, 0, sizeof *jcomp); jcomp->quality = v4l2_ctrl_g_ctrl(sd->jpegqual); jcomp->jpeg_markers = V4L2_JPEG_MARKER_DHT | V4L2_JPEG_MARKER_DQT; return 0; } #if IS_ENABLED(CONFIG_INPUT) static int sd_int_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* interrupt packet data */ int len) /* interrupt packet length */ { if (len == 8 && data[4] == 1) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 1); input_sync(gspca_dev->input_dev); input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); } return 0; } #endif static const struct sd_desc sd_desc = { .name = KBUILD_MODNAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .isoc_init = sd_pre_start, .start = sd_start, .stop0 = sd_stop0, .pkt_scan = sd_pkt_scan, .get_jcomp = sd_get_jcomp, .set_jcomp = sd_set_jcomp, #if IS_ENABLED(CONFIG_INPUT) .int_pkt_scan = sd_int_pkt_scan, #endif }; static const struct usb_device_id device_table[] = { {USB_DEVICE(0x03f0, 0x1b07)}, {USB_DEVICE(0x041e, 0x041e)}, {USB_DEVICE(0x041e, 0x4017)}, {USB_DEVICE(0x041e, 0x401c), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x041e, 0x401e)}, {USB_DEVICE(0x041e, 0x401f)}, {USB_DEVICE(0x041e, 0x4022)}, {USB_DEVICE(0x041e, 0x4029)}, {USB_DEVICE(0x041e, 0x4034), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x041e, 0x4035), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x041e, 0x4036)}, {USB_DEVICE(0x041e, 0x403a)}, {USB_DEVICE(0x041e, 0x4051), .driver_info = SENSOR_GC0303}, {USB_DEVICE(0x041e, 0x4053), .driver_info = SENSOR_GC0303}, {USB_DEVICE(0x0458, 0x7007)}, {USB_DEVICE(0x0458, 0x700c)}, {USB_DEVICE(0x0458, 0x700f)}, {USB_DEVICE(0x0461, 0x0a00)}, {USB_DEVICE(0x046d, 0x089d), .driver_info = SENSOR_MC501CB}, {USB_DEVICE(0x046d, 0x08a0)}, {USB_DEVICE(0x046d, 0x08a1)}, {USB_DEVICE(0x046d, 0x08a2)}, {USB_DEVICE(0x046d, 0x08a3)}, {USB_DEVICE(0x046d, 0x08a6)}, {USB_DEVICE(0x046d, 0x08a7)}, {USB_DEVICE(0x046d, 0x08a9)}, {USB_DEVICE(0x046d, 0x08aa)}, {USB_DEVICE(0x046d, 0x08ac)}, {USB_DEVICE(0x046d, 0x08ad)}, {USB_DEVICE(0x046d, 0x08ae)}, {USB_DEVICE(0x046d, 0x08af)}, {USB_DEVICE(0x046d, 0x08b9)}, {USB_DEVICE(0x046d, 0x08d7)}, {USB_DEVICE(0x046d, 0x08d8)}, {USB_DEVICE(0x046d, 0x08d9)}, {USB_DEVICE(0x046d, 0x08da)}, {USB_DEVICE(0x046d, 0x08dd), .driver_info = SENSOR_MC501CB}, {USB_DEVICE(0x0471, 0x0325), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x0471, 0x0326), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x0471, 0x032d), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x0471, 0x032e), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x055f, 0xc005)}, {USB_DEVICE(0x055f, 0xd003)}, {USB_DEVICE(0x055f, 0xd004)}, {USB_DEVICE(0x0698, 0x2003)}, {USB_DEVICE(0x0ac8, 0x0301), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x0ac8, 0x0302), .driver_info = SENSOR_PAS106}, {USB_DEVICE(0x0ac8, 0x301b)}, {USB_DEVICE(0x0ac8, 0x303b)}, {USB_DEVICE(0x0ac8, 0x305b)}, {USB_DEVICE(0x0ac8, 0x307b)}, {USB_DEVICE(0x10fd, 0x0128)}, {USB_DEVICE(0x10fd, 0x804d)}, {USB_DEVICE(0x10fd, 0x8050)}, {} /* end of entry */ }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } /* USB driver */ static struct usb_driver sd_driver = { .name = KBUILD_MODNAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); module_param(force_sensor, int, 0644); MODULE_PARM_DESC(force_sensor, "Force sensor. Only for experts!!!");
28 28 28 16 25 124 3 3 28 79 79 76 9 78 77 78 508 509 508 7 439 78 437 440 438 7 400 398 40 69 28 89 39 9 30 2 85 79 122 124 124 124 122 124 122 95 28 123 120 4 122 96 28 58 70 71 70 1 4 70 4 4 4 58 1 120 5 121 93 94 2 4 117 43 79 5 79 79 9 2 78 113 32 111 2 111 113 113 61 14 14 34 85 86 86 86 87 28 201 4 201 19 19 95 69 29 29 103 27 97 62 28 87 8 28 28 28 28 83 84 83 84 84 130 128 3 3 3 3 3 3 3 3 28 28 24 28 28 28 28 28 8 8 8 8 3 3 3 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 28 19 19 19 19 19 19 19 28 28 28 20 28 28 28 28 28 28 28 28 22 19 28 24 24 8 8 8 8 8 8 28 28 25 25 28 28 28 28 24 15 16 19 28 27 28 28 28 25 25 25 25 25 25 19 25 19 17 28 28 25 25 16 28 28 28 28 28 16 17 17 16 16 16 16 16 16 28 28 28 28 16 16 2 1 2 2 1 2 4 4 3 2 4 4 4 4 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * * Swap reorganised 29.12.95, Stephen Tweedie. * kswapd added: 7.1.96 sct * Removed kswapd_ctl limits, and swap out as many pages as needed * to bring the system back to freepages.high: 2.4.97, Rik van Riel. * Zone aware kswapd started 02/00, Kanoj Sarcar (kanoj@sgi.com). * Multiqueue VM started 5.8.00, Rik van Riel. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/module.h> #include <linux/gfp.h> #include <linux/kernel_stat.h> #include <linux/swap.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/highmem.h> #include <linux/vmpressure.h> #include <linux/vmstat.h> #include <linux/file.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/buffer_head.h> /* for buffer_heads_over_limit */ #include <linux/mm_inline.h> #include <linux/backing-dev.h> #include <linux/rmap.h> #include <linux/topology.h> #include <linux/cpu.h> #include <linux/cpuset.h> #include <linux/compaction.h> #include <linux/notifier.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/memcontrol.h> #include <linux/migrate.h> #include <linux/delayacct.h> #include <linux/sysctl.h> #include <linux/memory-tiers.h> #include <linux/oom.h> #include <linux/pagevec.h> #include <linux/prefetch.h> #include <linux/printk.h> #include <linux/dax.h> #include <linux/psi.h> #include <linux/pagewalk.h> #include <linux/shmem_fs.h> #include <linux/ctype.h> #include <linux/debugfs.h> #include <linux/khugepaged.h> #include <linux/rculist_nulls.h> #include <linux/random.h> #include <asm/tlbflush.h> #include <asm/div64.h> #include <linux/swapops.h> #include <linux/balloon_compaction.h> #include <linux/sched/sysctl.h> #include "internal.h" #include "swap.h" #define CREATE_TRACE_POINTS #include <trace/events/vmscan.h> struct scan_control { /* How many pages shrink_list() should reclaim */ unsigned long nr_to_reclaim; /* * Nodemask of nodes allowed by the caller. If NULL, all nodes * are scanned. */ nodemask_t *nodemask; /* * The memory cgroup that hit its limit and as a result is the * primary target of this reclaim invocation. */ struct mem_cgroup *target_mem_cgroup; /* * Scan pressure balancing between anon and file LRUs */ unsigned long anon_cost; unsigned long file_cost; #ifdef CONFIG_MEMCG /* Swappiness value for proactive reclaim. Always use sc_swappiness()! */ int *proactive_swappiness; #endif /* Can active folios be deactivated as part of reclaim? */ #define DEACTIVATE_ANON 1 #define DEACTIVATE_FILE 2 unsigned int may_deactivate:2; unsigned int force_deactivate:1; unsigned int skipped_deactivate:1; /* Writepage batching in laptop mode; RECLAIM_WRITE */ unsigned int may_writepage:1; /* Can mapped folios be reclaimed? */ unsigned int may_unmap:1; /* Can folios be swapped as part of reclaim? */ unsigned int may_swap:1; /* Not allow cache_trim_mode to be turned on as part of reclaim? */ unsigned int no_cache_trim_mode:1; /* Has cache_trim_mode failed at least once? */ unsigned int cache_trim_mode_failed:1; /* Proactive reclaim invoked by userspace through memory.reclaim */ unsigned int proactive:1; /* * Cgroup memory below memory.low is protected as long as we * don't threaten to OOM. If any cgroup is reclaimed at * reduced force or passed over entirely due to its memory.low * setting (memcg_low_skipped), and nothing is reclaimed as a * result, then go back for one more cycle that reclaims the protected * memory (memcg_low_reclaim) to avert OOM. */ unsigned int memcg_low_reclaim:1; unsigned int memcg_low_skipped:1; /* Shared cgroup tree walk failed, rescan the whole tree */ unsigned int memcg_full_walk:1; unsigned int hibernation_mode:1; /* One of the zones is ready for compaction */ unsigned int compaction_ready:1; /* There is easily reclaimable cold cache in the current node */ unsigned int cache_trim_mode:1; /* The file folios on the current node are dangerously low */ unsigned int file_is_tiny:1; /* Always discard instead of demoting to lower tier memory */ unsigned int no_demotion:1; /* Allocation order */ s8 order; /* Scan (total_size >> priority) pages at once */ s8 priority; /* The highest zone to isolate folios for reclaim from */ s8 reclaim_idx; /* This context's GFP mask */ gfp_t gfp_mask; /* Incremented by the number of inactive pages that were scanned */ unsigned long nr_scanned; /* Number of pages freed so far during a call to shrink_zones() */ unsigned long nr_reclaimed; struct { unsigned int dirty; unsigned int unqueued_dirty; unsigned int congested; unsigned int writeback; unsigned int immediate; unsigned int file_taken; unsigned int taken; } nr; /* for recording the reclaimed slab by now */ struct reclaim_state reclaim_state; }; #ifdef ARCH_HAS_PREFETCHW #define prefetchw_prev_lru_folio(_folio, _base, _field) \ do { \ if ((_folio)->lru.prev != _base) { \ struct folio *prev; \ \ prev = lru_to_folio(&(_folio->lru)); \ prefetchw(&prev->_field); \ } \ } while (0) #else #define prefetchw_prev_lru_folio(_folio, _base, _field) do { } while (0) #endif /* * From 0 .. MAX_SWAPPINESS. Higher means more swappy. */ int vm_swappiness = 60; #ifdef CONFIG_MEMCG /* Returns true for reclaim through cgroup limits or cgroup interfaces. */ static bool cgroup_reclaim(struct scan_control *sc) { return sc->target_mem_cgroup; } /* * Returns true for reclaim on the root cgroup. This is true for direct * allocator reclaim and reclaim through cgroup interfaces on the root cgroup. */ static bool root_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup || mem_cgroup_is_root(sc->target_mem_cgroup); } /** * writeback_throttling_sane - is the usual dirty throttling mechanism available? * @sc: scan_control in question * * The normal page dirty throttling mechanism in balance_dirty_pages() is * completely broken with the legacy memcg and direct stalling in * shrink_folio_list() is used for throttling instead, which lacks all the * niceties such as fairness, adaptive pausing, bandwidth proportional * allocation and configurability. * * This function tests whether the vmscan currently in progress can assume * that the normal dirty throttling mechanism is operational. */ static bool writeback_throttling_sane(struct scan_control *sc) { if (!cgroup_reclaim(sc)) return true; #ifdef CONFIG_CGROUP_WRITEBACK if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return true; #endif return false; } static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg) { if (sc->proactive && sc->proactive_swappiness) return *sc->proactive_swappiness; return mem_cgroup_swappiness(memcg); } #else static bool cgroup_reclaim(struct scan_control *sc) { return false; } static bool root_reclaim(struct scan_control *sc) { return true; } static bool writeback_throttling_sane(struct scan_control *sc) { return true; } static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg) { return READ_ONCE(vm_swappiness); } #endif static void set_task_reclaim_state(struct task_struct *task, struct reclaim_state *rs) { /* Check for an overwrite */ WARN_ON_ONCE(rs && task->reclaim_state); /* Check for the nulling of an already-nulled member */ WARN_ON_ONCE(!rs && !task->reclaim_state); task->reclaim_state = rs; } /* * flush_reclaim_state(): add pages reclaimed outside of LRU-based reclaim to * scan_control->nr_reclaimed. */ static void flush_reclaim_state(struct scan_control *sc) { /* * Currently, reclaim_state->reclaimed includes three types of pages * freed outside of vmscan: * (1) Slab pages. * (2) Clean file pages from pruned inodes (on highmem systems). * (3) XFS freed buffer pages. * * For all of these cases, we cannot universally link the pages to a * single memcg. For example, a memcg-aware shrinker can free one object * charged to the target memcg, causing an entire page to be freed. * If we count the entire page as reclaimed from the memcg, we end up * overestimating the reclaimed amount (potentially under-reclaiming). * * Only count such pages for global reclaim to prevent under-reclaiming * from the target memcg; preventing unnecessary retries during memcg * charging and false positives from proactive reclaim. * * For uncommon cases where the freed pages were actually mostly * charged to the target memcg, we end up underestimating the reclaimed * amount. This should be fine. The freed pages will be uncharged * anyway, even if they are not counted here properly, and we will be * able to make forward progress in charging (which is usually in a * retry loop). * * We can go one step further, and report the uncharged objcg pages in * memcg reclaim, to make reporting more accurate and reduce * underestimation, but it's probably not worth the complexity for now. */ if (current->reclaim_state && root_reclaim(sc)) { sc->nr_reclaimed += current->reclaim_state->reclaimed; current->reclaim_state->reclaimed = 0; } } static bool can_demote(int nid, struct scan_control *sc) { if (!numa_demotion_enabled) return false; if (sc && sc->no_demotion) return false; if (next_demotion_node(nid) == NUMA_NO_NODE) return false; return true; } static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg, int nid, struct scan_control *sc) { if (memcg == NULL) { /* * For non-memcg reclaim, is there * space in any swap device? */ if (get_nr_swap_pages() > 0) return true; } else { /* Is the memcg below its swap limit? */ if (mem_cgroup_get_nr_swap_pages(memcg) > 0) return true; } /* * The page can not be swapped. * * Can it be reclaimed from this node via demotion? */ return can_demote(nid, sc); } /* * This misses isolated folios which are not accounted for to save counters. * As the data only determines if reclaim or compaction continues, it is * not expected that isolated folios will be a dominating factor. */ unsigned long zone_reclaimable_pages(struct zone *zone) { unsigned long nr; nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE); if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL)) nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON); return nr; } /** * lruvec_lru_size - Returns the number of pages on the given LRU list. * @lruvec: lru vector * @lru: lru to use * @zone_idx: zones to consider (use MAX_NR_ZONES - 1 for the whole LRU list) */ static unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx) { unsigned long size = 0; int zid; for (zid = 0; zid <= zone_idx; zid++) { struct zone *zone = &lruvec_pgdat(lruvec)->node_zones[zid]; if (!managed_zone(zone)) continue; if (!mem_cgroup_disabled()) size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid); else size += zone_page_state(zone, NR_ZONE_LRU_BASE + lru); } return size; } static unsigned long drop_slab_node(int nid) { unsigned long freed = 0; struct mem_cgroup *memcg = NULL; memcg = mem_cgroup_iter(NULL, NULL, NULL); do { freed += shrink_slab(GFP_KERNEL, nid, memcg, 0); } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); return freed; } void drop_slab(void) { int nid; int shift = 0; unsigned long freed; do { freed = 0; for_each_online_node(nid) { if (fatal_signal_pending(current)) return; freed += drop_slab_node(nid); } } while ((freed >> shift++) > 1); } static int reclaimer_offset(void) { BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD); BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD); BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != PGSCAN_DIRECT - PGSCAN_KSWAPD); BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD); if (current_is_kswapd()) return 0; if (current_is_khugepaged()) return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD; return PGSTEAL_DIRECT - PGSTEAL_KSWAPD; } static inline int is_page_cache_freeable(struct folio *folio) { /* * A freeable page cache folio is referenced only by the caller * that isolated the folio, the page cache and optional filesystem * private data at folio->private. */ return folio_ref_count(folio) - folio_test_private(folio) == 1 + folio_nr_pages(folio); } /* * We detected a synchronous write error writing a folio out. Probably * -ENOSPC. We need to propagate that into the address_space for a subsequent * fsync(), msync() or close(). * * The tricky part is that after writepage we cannot touch the mapping: nothing * prevents it from being freed up. But we have a ref on the folio and once * that folio is locked, the mapping is pinned. * * We're allowed to run sleeping folio_lock() here because we know the caller has * __GFP_FS. */ static void handle_write_error(struct address_space *mapping, struct folio *folio, int error) { folio_lock(folio); if (folio_mapping(folio) == mapping) mapping_set_error(mapping, error); folio_unlock(folio); } static bool skip_throttle_noprogress(pg_data_t *pgdat) { int reclaimable = 0, write_pending = 0; int i; /* * If kswapd is disabled, reschedule if necessary but do not * throttle as the system is likely near OOM. */ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) return true; /* * If there are a lot of dirty/writeback folios then do not * throttle as throttling will occur when the folios cycle * towards the end of the LRU if still under writeback. */ for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; reclaimable += zone_reclaimable_pages(zone); write_pending += zone_page_state_snapshot(zone, NR_ZONE_WRITE_PENDING); } if (2 * write_pending <= reclaimable) return true; return false; } void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) { wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; long timeout, ret; DEFINE_WAIT(wait); /* * Do not throttle user workers, kthreads other than kswapd or * workqueues. They may be required for reclaim to make * forward progress (e.g. journalling workqueues or kthreads). */ if (!current_is_kswapd() && current->flags & (PF_USER_WORKER|PF_KTHREAD)) { cond_resched(); return; } /* * These figures are pulled out of thin air. * VMSCAN_THROTTLE_ISOLATED is a transient condition based on too many * parallel reclaimers which is a short-lived event so the timeout is * short. Failing to make progress or waiting on writeback are * potentially long-lived events so use a longer timeout. This is shaky * logic as a failure to make progress could be due to anything from * writeback to a slow device to excessive referenced folios at the tail * of the inactive LRU. */ switch(reason) { case VMSCAN_THROTTLE_WRITEBACK: timeout = HZ/10; if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) { WRITE_ONCE(pgdat->nr_reclaim_start, node_page_state(pgdat, NR_THROTTLED_WRITTEN)); } break; case VMSCAN_THROTTLE_CONGESTED: fallthrough; case VMSCAN_THROTTLE_NOPROGRESS: if (skip_throttle_noprogress(pgdat)) { cond_resched(); return; } timeout = 1; break; case VMSCAN_THROTTLE_ISOLATED: timeout = HZ/50; break; default: WARN_ON_ONCE(1); timeout = HZ; break; } prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); ret = schedule_timeout(timeout); finish_wait(wqh, &wait); if (reason == VMSCAN_THROTTLE_WRITEBACK) atomic_dec(&pgdat->nr_writeback_throttled); trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout), jiffies_to_usecs(timeout - ret), reason); } /* * Account for folios written if tasks are throttled waiting on dirty * folios to clean. If enough folios have been cleaned since throttling * started then wakeup the throttled tasks. */ void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, int nr_throttled) { unsigned long nr_written; node_stat_add_folio(folio, NR_THROTTLED_WRITTEN); /* * This is an inaccurate read as the per-cpu deltas may not * be synchronised. However, given that the system is * writeback throttled, it is not worth taking the penalty * of getting an accurate count. At worst, the throttle * timeout guarantees forward progress. */ nr_written = node_page_state(pgdat, NR_THROTTLED_WRITTEN) - READ_ONCE(pgdat->nr_reclaim_start); if (nr_written > SWAP_CLUSTER_MAX * nr_throttled) wake_up(&pgdat->reclaim_wait[VMSCAN_THROTTLE_WRITEBACK]); } /* possible outcome of pageout() */ typedef enum { /* failed to write folio out, folio is locked */ PAGE_KEEP, /* move folio to the active list, folio is locked */ PAGE_ACTIVATE, /* folio has been sent to the disk successfully, folio is unlocked */ PAGE_SUCCESS, /* folio is clean and locked */ PAGE_CLEAN, } pageout_t; /* * pageout is called by shrink_folio_list() for each dirty folio. * Calls ->writepage(). */ static pageout_t pageout(struct folio *folio, struct address_space *mapping, struct swap_iocb **plug) { /* * If the folio is dirty, only perform writeback if that write * will be non-blocking. To prevent this allocation from being * stalled by pagecache activity. But note that there may be * stalls if we need to run get_block(). We could test * PagePrivate for that. * * If this process is currently in __generic_file_write_iter() against * this folio's queue, we can perform writeback even if that * will block. * * If the folio is swapcache, write it back even if that would * block, for some throttling. This happens by accident, because * swap_backing_dev_info is bust: it doesn't reflect the * congestion state of the swapdevs. Easy to fix, if needed. */ if (!is_page_cache_freeable(folio)) return PAGE_KEEP; if (!mapping) { /* * Some data journaling orphaned folios can have * folio->mapping == NULL while being dirty with clean buffers. */ if (folio_test_private(folio)) { if (try_to_free_buffers(folio)) { folio_clear_dirty(folio); pr_info("%s: orphaned folio\n", __func__); return PAGE_CLEAN; } } return PAGE_KEEP; } if (mapping->a_ops->writepage == NULL) return PAGE_ACTIVATE; if (folio_clear_dirty_for_io(folio)) { int res; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .nr_to_write = SWAP_CLUSTER_MAX, .range_start = 0, .range_end = LLONG_MAX, .for_reclaim = 1, .swap_plug = plug, }; folio_set_reclaim(folio); res = mapping->a_ops->writepage(&folio->page, &wbc); if (res < 0) handle_write_error(mapping, folio, res); if (res == AOP_WRITEPAGE_ACTIVATE) { folio_clear_reclaim(folio); return PAGE_ACTIVATE; } if (!folio_test_writeback(folio)) { /* synchronous write or broken a_ops? */ folio_clear_reclaim(folio); } trace_mm_vmscan_write_folio(folio); node_stat_add_folio(folio, NR_VMSCAN_WRITE); return PAGE_SUCCESS; } return PAGE_CLEAN; } /* * Same as remove_mapping, but if the folio is removed from the mapping, it * gets returned with a refcount of 0. */ static int __remove_mapping(struct address_space *mapping, struct folio *folio, bool reclaimed, struct mem_cgroup *target_memcg) { int refcount; void *shadow = NULL; BUG_ON(!folio_test_locked(folio)); BUG_ON(mapping != folio_mapping(folio)); if (!folio_test_swapcache(folio)) spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); /* * The non racy check for a busy folio. * * Must be careful with the order of the tests. When someone has * a ref to the folio, it may be possible that they dirty it then * drop the reference. So if the dirty flag is tested before the * refcount here, then the following race may occur: * * get_user_pages(&page); * [user mapping goes away] * write_to(page); * !folio_test_dirty(folio) [good] * folio_set_dirty(folio); * folio_put(folio); * !refcount(folio) [good, discard it] * * [oops, our write_to data is lost] * * Reversing the order of the tests ensures such a situation cannot * escape unnoticed. The smp_rmb is needed to ensure the folio->flags * load is not satisfied before that of folio->_refcount. * * Note that if the dirty flag is always set via folio_mark_dirty, * and thus under the i_pages lock, then this ordering is not required. */ refcount = 1 + folio_nr_pages(folio); if (!folio_ref_freeze(folio, refcount)) goto cannot_free; /* note: atomic_cmpxchg in folio_ref_freeze provides the smp_rmb */ if (unlikely(folio_test_dirty(folio))) { folio_ref_unfreeze(folio, refcount); goto cannot_free; } if (folio_test_swapcache(folio)) { swp_entry_t swap = folio->swap; if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(folio, target_memcg); __delete_from_swap_cache(folio, swap, shadow); mem_cgroup_swapout(folio, swap); xa_unlock_irq(&mapping->i_pages); put_swap_folio(folio, swap); } else { void (*free_folio)(struct folio *); free_folio = mapping->a_ops->free_folio; /* * Remember a shadow entry for reclaimed file cache in * order to detect refaults, thus thrashing, later on. * * But don't store shadows in an address space that is * already exiting. This is not just an optimization, * inode reclaim needs to empty out the radix tree or * the nodes are lost. Don't plant shadows behind its * back. * * We also don't store shadows for DAX mappings because the * only page cache folios found in these are zero pages * covering holes, and because we don't want to mix DAX * exceptional entries and shadow exceptional entries in the * same address_space. */ if (reclaimed && folio_is_file_lru(folio) && !mapping_exiting(mapping) && !dax_mapping(mapping)) shadow = workingset_eviction(folio, target_memcg); __filemap_remove_folio(folio, shadow); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); if (free_folio) free_folio(folio); } return 1; cannot_free: xa_unlock_irq(&mapping->i_pages); if (!folio_test_swapcache(folio)) spin_unlock(&mapping->host->i_lock); return 0; } /** * remove_mapping() - Attempt to remove a folio from its mapping. * @mapping: The address space. * @folio: The folio to remove. * * If the folio is dirty, under writeback or if someone else has a ref * on it, removal will fail. * Return: The number of pages removed from the mapping. 0 if the folio * could not be removed. * Context: The caller should have a single refcount on the folio and * hold its lock. */ long remove_mapping(struct address_space *mapping, struct folio *folio) { if (__remove_mapping(mapping, folio, false, NULL)) { /* * Unfreezing the refcount with 1 effectively * drops the pagecache ref for us without requiring another * atomic operation. */ folio_ref_unfreeze(folio, 1); return folio_nr_pages(folio); } return 0; } /** * folio_putback_lru - Put previously isolated folio onto appropriate LRU list. * @folio: Folio to be returned to an LRU list. * * Add previously isolated @folio to appropriate LRU list. * The folio may still be unevictable for other reasons. * * Context: lru_lock must not be held, interrupts must be enabled. */ void folio_putback_lru(struct folio *folio) { folio_add_lru(folio); folio_put(folio); /* drop ref from isolate */ } enum folio_references { FOLIOREF_RECLAIM, FOLIOREF_RECLAIM_CLEAN, FOLIOREF_KEEP, FOLIOREF_ACTIVATE, }; static enum folio_references folio_check_references(struct folio *folio, struct scan_control *sc) { int referenced_ptes, referenced_folio; unsigned long vm_flags; referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup, &vm_flags); referenced_folio = folio_test_clear_referenced(folio); /* * The supposedly reclaimable folio was found to be in a VM_LOCKED vma. * Let the folio, now marked Mlocked, be moved to the unevictable list. */ if (vm_flags & VM_LOCKED) return FOLIOREF_ACTIVATE; /* rmap lock contention: rotate */ if (referenced_ptes == -1) return FOLIOREF_KEEP; if (referenced_ptes) { /* * All mapped folios start out with page table * references from the instantiating fault, so we need * to look twice if a mapped file/anon folio is used more * than once. * * Mark it and spare it for another trip around the * inactive list. Another page table reference will * lead to its activation. * * Note: the mark is set for activated folios as well * so that recently deactivated but used folios are * quickly recovered. */ folio_set_referenced(folio); if (referenced_folio || referenced_ptes > 1) return FOLIOREF_ACTIVATE; /* * Activate file-backed executable folios after first usage. */ if ((vm_flags & VM_EXEC) && folio_is_file_lru(folio)) return FOLIOREF_ACTIVATE; return FOLIOREF_KEEP; } /* Reclaim if clean, defer dirty folios to writeback */ if (referenced_folio && folio_is_file_lru(folio)) return FOLIOREF_RECLAIM_CLEAN; return FOLIOREF_RECLAIM; } /* Check if a folio is dirty or under writeback */ static void folio_check_dirty_writeback(struct folio *folio, bool *dirty, bool *writeback) { struct address_space *mapping; /* * Anonymous folios are not handled by flushers and must be written * from reclaim context. Do not stall reclaim based on them. * MADV_FREE anonymous folios are put into inactive file list too. * They could be mistakenly treated as file lru. So further anon * test is needed. */ if (!folio_is_file_lru(folio) || (folio_test_anon(folio) && !folio_test_swapbacked(folio))) { *dirty = false; *writeback = false; return; } /* By default assume that the folio flags are accurate */ *dirty = folio_test_dirty(folio); *writeback = folio_test_writeback(folio); /* Verify dirty/writeback state if the filesystem supports it */ if (!folio_test_private(folio)) return; mapping = folio_mapping(folio); if (mapping && mapping->a_ops->is_dirty_writeback) mapping->a_ops->is_dirty_writeback(folio, dirty, writeback); } struct folio *alloc_migrate_folio(struct folio *src, unsigned long private) { struct folio *dst; nodemask_t *allowed_mask; struct migration_target_control *mtc; mtc = (struct migration_target_control *)private; allowed_mask = mtc->nmask; /* * make sure we allocate from the target node first also trying to * demote or reclaim pages from the target node via kswapd if we are * low on free memory on target node. If we don't do this and if * we have free memory on the slower(lower) memtier, we would start * allocating pages from slower(lower) memory tiers without even forcing * a demotion of cold pages from the target memtier. This can result * in the kernel placing hot pages in slower(lower) memory tiers. */ mtc->nmask = NULL; mtc->gfp_mask |= __GFP_THISNODE; dst = alloc_migration_target(src, (unsigned long)mtc); if (dst) return dst; mtc->gfp_mask &= ~__GFP_THISNODE; mtc->nmask = allowed_mask; return alloc_migration_target(src, (unsigned long)mtc); } /* * Take folios on @demote_folios and attempt to demote them to another node. * Folios which are not demoted are left on @demote_folios. */ static unsigned int demote_folio_list(struct list_head *demote_folios, struct pglist_data *pgdat) { int target_nid = next_demotion_node(pgdat->node_id); unsigned int nr_succeeded; nodemask_t allowed_mask; struct migration_target_control mtc = { /* * Allocate from 'node', or fail quickly and quietly. * When this happens, 'page' will likely just be discarded * instead of migrated. */ .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | __GFP_NOWARN | __GFP_NOMEMALLOC | GFP_NOWAIT, .nid = target_nid, .nmask = &allowed_mask, .reason = MR_DEMOTION, }; if (list_empty(demote_folios)) return 0; if (target_nid == NUMA_NO_NODE) return 0; node_get_allowed_targets(pgdat, &allowed_mask); /* Demotion ignores all cpuset and mempolicy settings */ migrate_pages(demote_folios, alloc_migrate_folio, NULL, (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); mod_node_page_state(pgdat, PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded); return nr_succeeded; } static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask) { if (gfp_mask & __GFP_FS) return true; if (!folio_test_swapcache(folio) || !(gfp_mask & __GFP_IO)) return false; /* * We can "enter_fs" for swap-cache with only __GFP_IO * providing this isn't SWP_FS_OPS. * ->flags can be updated non-atomicially (scan_swap_map_slots), * but that will never affect SWP_FS_OPS, so the data_race * is safe. */ return !data_race(folio_swap_flags(folio) & SWP_FS_OPS); } /* * shrink_folio_list() returns the number of reclaimed pages */ static unsigned int shrink_folio_list(struct list_head *folio_list, struct pglist_data *pgdat, struct scan_control *sc, struct reclaim_stat *stat, bool ignore_references) { struct folio_batch free_folios; LIST_HEAD(ret_folios); LIST_HEAD(demote_folios); unsigned int nr_reclaimed = 0; unsigned int pgactivate = 0; bool do_demote_pass; struct swap_iocb *plug = NULL; folio_batch_init(&free_folios); memset(stat, 0, sizeof(*stat)); cond_resched(); do_demote_pass = can_demote(pgdat->node_id, sc); retry: while (!list_empty(folio_list)) { struct address_space *mapping; struct folio *folio; enum folio_references references = FOLIOREF_RECLAIM; bool dirty, writeback; unsigned int nr_pages; cond_resched(); folio = lru_to_folio(folio_list); list_del(&folio->lru); if (!folio_trylock(folio)) goto keep; VM_BUG_ON_FOLIO(folio_test_active(folio), folio); nr_pages = folio_nr_pages(folio); /* Account the number of base pages */ sc->nr_scanned += nr_pages; if (unlikely(!folio_evictable(folio))) goto activate_locked; if (!sc->may_unmap && folio_mapped(folio)) goto keep_locked; /* folio_update_gen() tried to promote this page? */ if (lru_gen_enabled() && !ignore_references && folio_mapped(folio) && folio_test_referenced(folio)) goto keep_locked; /* * The number of dirty pages determines if a node is marked * reclaim_congested. kswapd will stall and start writing * folios if the tail of the LRU is all dirty unqueued folios. */ folio_check_dirty_writeback(folio, &dirty, &writeback); if (dirty || writeback) stat->nr_dirty += nr_pages; if (dirty && !writeback) stat->nr_unqueued_dirty += nr_pages; /* * Treat this folio as congested if folios are cycling * through the LRU so quickly that the folios marked * for immediate reclaim are making it to the end of * the LRU a second time. */ if (writeback && folio_test_reclaim(folio)) stat->nr_congested += nr_pages; /* * If a folio at the tail of the LRU is under writeback, there * are three cases to consider. * * 1) If reclaim is encountering an excessive number * of folios under writeback and this folio has both * the writeback and reclaim flags set, then it * indicates that folios are being queued for I/O but * are being recycled through the LRU before the I/O * can complete. Waiting on the folio itself risks an * indefinite stall if it is impossible to writeback * the folio due to I/O error or disconnected storage * so instead note that the LRU is being scanned too * quickly and the caller can stall after the folio * list has been processed. * * 2) Global or new memcg reclaim encounters a folio that is * not marked for immediate reclaim, or the caller does not * have __GFP_FS (or __GFP_IO if it's simply going to swap, * not to fs). In this case mark the folio for immediate * reclaim and continue scanning. * * Require may_enter_fs() because we would wait on fs, which * may not have submitted I/O yet. And the loop driver might * enter reclaim, and deadlock if it waits on a folio for * which it is needed to do the write (loop masks off * __GFP_IO|__GFP_FS for this reason); but more thought * would probably show more reasons. * * 3) Legacy memcg encounters a folio that already has the * reclaim flag set. memcg does not have any dirty folio * throttling so we could easily OOM just because too many * folios are in writeback and there is nothing else to * reclaim. Wait for the writeback to complete. * * In cases 1) and 2) we activate the folios to get them out of * the way while we continue scanning for clean folios on the * inactive list and refilling from the active list. The * observation here is that waiting for disk writes is more * expensive than potentially causing reloads down the line. * Since they're marked for immediate reclaim, they won't put * memory pressure on the cache working set any longer than it * takes to write them to disk. */ if (folio_test_writeback(folio)) { /* Case 1 above */ if (current_is_kswapd() && folio_test_reclaim(folio) && test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { stat->nr_immediate += nr_pages; goto activate_locked; /* Case 2 above */ } else if (writeback_throttling_sane(sc) || !folio_test_reclaim(folio) || !may_enter_fs(folio, sc->gfp_mask)) { /* * This is slightly racy - * folio_end_writeback() might have * just cleared the reclaim flag, then * setting the reclaim flag here ends up * interpreted as the readahead flag - but * that does not matter enough to care. * What we do want is for this folio to * have the reclaim flag set next time * memcg reclaim reaches the tests above, * so it will then wait for writeback to * avoid OOM; and it's also appropriate * in global reclaim. */ folio_set_reclaim(folio); stat->nr_writeback += nr_pages; goto activate_locked; /* Case 3 above */ } else { folio_unlock(folio); folio_wait_writeback(folio); /* then go back and try same folio again */ list_add_tail(&folio->lru, folio_list); continue; } } if (!ignore_references) references = folio_check_references(folio, sc); switch (references) { case FOLIOREF_ACTIVATE: goto activate_locked; case FOLIOREF_KEEP: stat->nr_ref_keep += nr_pages; goto keep_locked; case FOLIOREF_RECLAIM: case FOLIOREF_RECLAIM_CLEAN: ; /* try to reclaim the folio below */ } /* * Before reclaiming the folio, try to relocate * its contents to another node. */ if (do_demote_pass && (thp_migration_supported() || !folio_test_large(folio))) { list_add(&folio->lru, &demote_folios); folio_unlock(folio); continue; } /* * Anonymous process memory has backing store? * Try to allocate it some swap space here. * Lazyfree folio could be freed directly */ if (folio_test_anon(folio) && folio_test_swapbacked(folio)) { if (!folio_test_swapcache(folio)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; if (folio_maybe_dma_pinned(folio)) goto keep_locked; if (folio_test_large(folio)) { /* cannot split folio, skip it */ if (!can_split_folio(folio, NULL)) goto activate_locked; /* * Split partially mapped folios right away. * We can free the unmapped pages without IO. */ if (data_race(!list_empty(&folio->_deferred_list)) && split_folio_to_list(folio, folio_list)) goto activate_locked; } if (!add_to_swap(folio)) { int __maybe_unused order = folio_order(folio); if (!folio_test_large(folio)) goto activate_locked_split; /* Fallback to swap normal pages */ if (split_folio_to_list(folio, folio_list)) goto activate_locked; #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (nr_pages >= HPAGE_PMD_NR) { count_memcg_folio_events(folio, THP_SWPOUT_FALLBACK, 1); count_vm_event(THP_SWPOUT_FALLBACK); } count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK); #endif if (!add_to_swap(folio)) goto activate_locked_split; } } } else if (folio_test_swapbacked(folio) && folio_test_large(folio)) { /* Split shmem folio */ if (split_folio_to_list(folio, folio_list)) goto keep_locked; } /* * If the folio was split above, the tail pages will make * their own pass through this function and be accounted * then. */ if ((nr_pages > 1) && !folio_test_large(folio)) { sc->nr_scanned -= (nr_pages - 1); nr_pages = 1; } /* * The folio is mapped into the page tables of one or more * processes. Try to unmap it here. */ if (folio_mapped(folio)) { enum ttu_flags flags = TTU_BATCH_FLUSH; bool was_swapbacked = folio_test_swapbacked(folio); if (folio_test_pmd_mappable(folio)) flags |= TTU_SPLIT_HUGE_PMD; /* * Without TTU_SYNC, try_to_unmap will only begin to * hold PTL from the first present PTE within a large * folio. Some initial PTEs might be skipped due to * races with parallel PTE writes in which PTEs can be * cleared temporarily before being written new present * values. This will lead to a large folio is still * mapped while some subpages have been partially * unmapped after try_to_unmap; TTU_SYNC helps * try_to_unmap acquire PTL from the first PTE, * eliminating the influence of temporary PTE values. */ if (folio_test_large(folio)) flags |= TTU_SYNC; try_to_unmap(folio, flags); if (folio_mapped(folio)) { stat->nr_unmap_fail += nr_pages; if (!was_swapbacked && folio_test_swapbacked(folio)) stat->nr_lazyfree_fail += nr_pages; goto activate_locked; } } /* * Folio is unmapped now so it cannot be newly pinned anymore. * No point in trying to reclaim folio if it is pinned. * Furthermore we don't want to reclaim underlying fs metadata * if the folio is pinned and thus potentially modified by the * pinning process as that may upset the filesystem. */ if (folio_maybe_dma_pinned(folio)) goto activate_locked; mapping = folio_mapping(folio); if (folio_test_dirty(folio)) { /* * Only kswapd can writeback filesystem folios * to avoid risk of stack overflow. But avoid * injecting inefficient single-folio I/O into * flusher writeback as much as possible: only * write folios when we've encountered many * dirty folios, and when we've already scanned * the rest of the LRU for clean folios and see * the same dirty folios again (with the reclaim * flag set). */ if (folio_is_file_lru(folio) && (!current_is_kswapd() || !folio_test_reclaim(folio) || !test_bit(PGDAT_DIRTY, &pgdat->flags))) { /* * Immediately reclaim when written back. * Similar in principle to folio_deactivate() * except we already have the folio isolated * and know it's dirty */ node_stat_mod_folio(folio, NR_VMSCAN_IMMEDIATE, nr_pages); folio_set_reclaim(folio); goto activate_locked; } if (references == FOLIOREF_RECLAIM_CLEAN) goto keep_locked; if (!may_enter_fs(folio, sc->gfp_mask)) goto keep_locked; if (!sc->may_writepage) goto keep_locked; /* * Folio is dirty. Flush the TLB if a writable entry * potentially exists to avoid CPU writes after I/O * starts and then write it out here. */ try_to_unmap_flush_dirty(); switch (pageout(folio, mapping, &plug)) { case PAGE_KEEP: goto keep_locked; case PAGE_ACTIVATE: goto activate_locked; case PAGE_SUCCESS: stat->nr_pageout += nr_pages; if (folio_test_writeback(folio)) goto keep; if (folio_test_dirty(folio)) goto keep; /* * A synchronous write - probably a ramdisk. Go * ahead and try to reclaim the folio. */ if (!folio_trylock(folio)) goto keep; if (folio_test_dirty(folio) || folio_test_writeback(folio)) goto keep_locked; mapping = folio_mapping(folio); fallthrough; case PAGE_CLEAN: ; /* try to free the folio below */ } } /* * If the folio has buffers, try to free the buffer * mappings associated with this folio. If we succeed * we try to free the folio as well. * * We do this even if the folio is dirty. * filemap_release_folio() does not perform I/O, but it * is possible for a folio to have the dirty flag set, * but it is actually clean (all its buffers are clean). * This happens if the buffers were written out directly, * with submit_bh(). ext3 will do this, as well as * the blockdev mapping. filemap_release_folio() will * discover that cleanness and will drop the buffers * and mark the folio clean - it can be freed. * * Rarely, folios can have buffers and no ->mapping. * These are the folios which were not successfully * invalidated in truncate_cleanup_folio(). We try to * drop those buffers here and if that worked, and the * folio is no longer mapped into process address space * (refcount == 1) it can be freed. Otherwise, leave * the folio on the LRU so it is swappable. */ if (folio_needs_release(folio)) { if (!filemap_release_folio(folio, sc->gfp_mask)) goto activate_locked; if (!mapping && folio_ref_count(folio) == 1) { folio_unlock(folio); if (folio_put_testzero(folio)) goto free_it; else { /* * rare race with speculative reference. * the speculative reference will free * this folio shortly, so we may * increment nr_reclaimed here (and * leave it off the LRU). */ nr_reclaimed += nr_pages; continue; } } } if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) { /* follow __remove_mapping for reference */ if (!folio_ref_freeze(folio, 1)) goto keep_locked; /* * The folio has only one reference left, which is * from the isolation. After the caller puts the * folio back on the lru and drops the reference, the * folio will be freed anyway. It doesn't matter * which lru it goes on. So we don't bother checking * the dirty flag here. */ count_vm_events(PGLAZYFREED, nr_pages); count_memcg_folio_events(folio, PGLAZYFREED, nr_pages); } else if (!mapping || !__remove_mapping(mapping, folio, true, sc->target_mem_cgroup)) goto keep_locked; folio_unlock(folio); free_it: /* * Folio may get swapped out as a whole, need to account * all pages in it. */ nr_reclaimed += nr_pages; folio_undo_large_rmappable(folio); if (folio_batch_add(&free_folios, folio) == 0) { mem_cgroup_uncharge_folios(&free_folios); try_to_unmap_flush(); free_unref_folios(&free_folios); } continue; activate_locked_split: /* * The tail pages that are failed to add into swap cache * reach here. Fixup nr_scanned and nr_pages. */ if (nr_pages > 1) { sc->nr_scanned -= (nr_pages - 1); nr_pages = 1; } activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ if (folio_test_swapcache(folio) && (mem_cgroup_swap_full(folio) || folio_test_mlocked(folio))) folio_free_swap(folio); VM_BUG_ON_FOLIO(folio_test_active(folio), folio); if (!folio_test_mlocked(folio)) { int type = folio_is_file_lru(folio); folio_set_active(folio); stat->nr_activate[type] += nr_pages; count_memcg_folio_events(folio, PGACTIVATE, nr_pages); } keep_locked: folio_unlock(folio); keep: list_add(&folio->lru, &ret_folios); VM_BUG_ON_FOLIO(folio_test_lru(folio) || folio_test_unevictable(folio), folio); } /* 'folio_list' is always empty here */ /* Migrate folios selected for demotion */ nr_reclaimed += demote_folio_list(&demote_folios, pgdat); /* Folios that could not be demoted are still in @demote_folios */ if (!list_empty(&demote_folios)) { /* Folios which weren't demoted go back on @folio_list */ list_splice_init(&demote_folios, folio_list); /* * goto retry to reclaim the undemoted folios in folio_list if * desired. * * Reclaiming directly from top tier nodes is not often desired * due to it breaking the LRU ordering: in general memory * should be reclaimed from lower tier nodes and demoted from * top tier nodes. * * However, disabling reclaim from top tier nodes entirely * would cause ooms in edge scenarios where lower tier memory * is unreclaimable for whatever reason, eg memory being * mlocked or too hot to reclaim. We can disable reclaim * from top tier nodes in proactive reclaim though as that is * not real memory pressure. */ if (!sc->proactive) { do_demote_pass = false; goto retry; } } pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; mem_cgroup_uncharge_folios(&free_folios); try_to_unmap_flush(); free_unref_folios(&free_folios); list_splice(&ret_folios, folio_list); count_vm_events(PGACTIVATE, pgactivate); if (plug) swap_write_unplug(plug); return nr_reclaimed; } unsigned int reclaim_clean_pages_from_list(struct zone *zone, struct list_head *folio_list) { struct scan_control sc = { .gfp_mask = GFP_KERNEL, .may_unmap = 1, }; struct reclaim_stat stat; unsigned int nr_reclaimed; struct folio *folio, *next; LIST_HEAD(clean_folios); unsigned int noreclaim_flag; list_for_each_entry_safe(folio, next, folio_list, lru) { if (!folio_test_hugetlb(folio) && folio_is_file_lru(folio) && !folio_test_dirty(folio) && !__folio_test_movable(folio) && !folio_test_unevictable(folio)) { folio_clear_active(folio); list_move(&folio->lru, &clean_folios); } } /* * We should be safe here since we are only dealing with file pages and * we are not kswapd and therefore cannot write dirty file pages. But * call memalloc_noreclaim_save() anyway, just in case these conditions * change in the future. */ noreclaim_flag = memalloc_noreclaim_save(); nr_reclaimed = shrink_folio_list(&clean_folios, zone->zone_pgdat, &sc, &stat, true); memalloc_noreclaim_restore(noreclaim_flag); list_splice(&clean_folios, folio_list); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -(long)nr_reclaimed); /* * Since lazyfree pages are isolated from file LRU from the beginning, * they will rotate back to anonymous LRU in the end if it failed to * discard so isolated count will be mismatched. * Compensate the isolated count for both LRU lists. */ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, stat.nr_lazyfree_fail); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -(long)stat.nr_lazyfree_fail); return nr_reclaimed; } /* * Update LRU sizes after isolating pages. The LRU size updates must * be complete before mem_cgroup_update_lru_size due to a sanity check. */ static __always_inline void update_lru_sizes(struct lruvec *lruvec, enum lru_list lru, unsigned long *nr_zone_taken) { int zid; for (zid = 0; zid < MAX_NR_ZONES; zid++) { if (!nr_zone_taken[zid]) continue; update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]); } } #ifdef CONFIG_CMA /* * It is waste of effort to scan and reclaim CMA pages if it is not available * for current allocation context. Kswapd can not be enrolled as it can not * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL */ static bool skip_cma(struct folio *folio, struct scan_control *sc) { return !current_is_kswapd() && gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE && folio_migratetype(folio) == MIGRATE_CMA; } #else static bool skip_cma(struct folio *folio, struct scan_control *sc) { return false; } #endif /* * Isolating page from the lruvec to fill in @dst list by nr_to_scan times. * * lruvec->lru_lock is heavily contended. Some of the functions that * shrink the lists perform better by taking out a batch of pages * and working on them outside the LRU lock. * * For pagecache intensive workloads, this function is the hottest * spot in the kernel (apart from copy_*_user functions). * * Lru_lock must be held before calling this function. * * @nr_to_scan: The number of eligible pages to look through on the list. * @lruvec: The LRU vector to pull pages from. * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. * @sc: The scan_control struct for this reclaim session * @lru: LRU list id for isolating * * returns how many pages were moved onto *@dst. */ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, struct lruvec *lruvec, struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc, enum lru_list lru) { struct list_head *src = &lruvec->lists[lru]; unsigned long nr_taken = 0; unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; unsigned long skipped = 0; unsigned long scan, total_scan, nr_pages; LIST_HEAD(folios_skipped); total_scan = 0; scan = 0; while (scan < nr_to_scan && !list_empty(src)) { struct list_head *move_to = src; struct folio *folio; folio = lru_to_folio(src); prefetchw_prev_lru_folio(folio, src, flags); nr_pages = folio_nr_pages(folio); total_scan += nr_pages; if (folio_zonenum(folio) > sc->reclaim_idx || skip_cma(folio, sc)) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; goto move; } /* * Do not count skipped folios because that makes the function * return with no isolated folios if the LRU mostly contains * ineligible folios. This causes the VM to not reclaim any * folios, triggering a premature OOM. * Account all pages in a folio. */ scan += nr_pages; if (!folio_test_lru(folio)) goto move; if (!sc->may_unmap && folio_mapped(folio)) goto move; /* * Be careful not to clear the lru flag until after we're * sure the folio is not being freed elsewhere -- the * folio release code relies on it. */ if (unlikely(!folio_try_get(folio))) goto move; if (!folio_test_clear_lru(folio)) { /* Another thread is already isolating this folio */ folio_put(folio); goto move; } nr_taken += nr_pages; nr_zone_taken[folio_zonenum(folio)] += nr_pages; move_to = dst; move: list_move(&folio->lru, move_to); } /* * Splice any skipped folios to the start of the LRU list. Note that * this disrupts the LRU order when reclaiming for lower zones but * we cannot splice to the tail. If we did then the SWAP_CLUSTER_MAX * scanning would soon rescan the same folios to skip and waste lots * of cpu cycles. */ if (!list_empty(&folios_skipped)) { int zid; list_splice(&folios_skipped, src); for (zid = 0; zid < MAX_NR_ZONES; zid++) { if (!nr_skipped[zid]) continue; __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]); skipped += nr_skipped[zid]; } } *nr_scanned = total_scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, total_scan, skipped, nr_taken, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } /** * folio_isolate_lru() - Try to isolate a folio from its LRU list. * @folio: Folio to isolate from its LRU list. * * Isolate a @folio from an LRU list and adjust the vmstat statistic * corresponding to whatever LRU list the folio was on. * * The folio will have its LRU flag cleared. If it was found on the * active list, it will have the Active flag set. If it was found on the * unevictable list, it will have the Unevictable flag set. These flags * may need to be cleared by the caller before letting the page go. * * Context: * * (1) Must be called with an elevated refcount on the folio. This is a * fundamental difference from isolate_lru_folios() (which is called * without a stable reference). * (2) The lru_lock must not be held. * (3) Interrupts must be enabled. * * Return: true if the folio was removed from an LRU list. * false if the folio was not on an LRU list. */ bool folio_isolate_lru(struct folio *folio) { bool ret = false; VM_BUG_ON_FOLIO(!folio_ref_count(folio), folio); if (folio_test_clear_lru(folio)) { struct lruvec *lruvec; folio_get(folio); lruvec = folio_lruvec_lock_irq(folio); lruvec_del_folio(lruvec, folio); unlock_page_lruvec_irq(lruvec); ret = true; } return ret; } /* * A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and * then get rescheduled. When there are massive number of tasks doing page * allocation, such sleeping direct reclaimers may keep piling up on each CPU, * the LRU list will go small and be scanned faster than necessary, leading to * unnecessary swapping, thrashing and OOM. */ static bool too_many_isolated(struct pglist_data *pgdat, int file, struct scan_control *sc) { unsigned long inactive, isolated; bool too_many; if (current_is_kswapd()) return false; if (!writeback_throttling_sane(sc)) return false; if (file) { inactive = node_page_state(pgdat, NR_INACTIVE_FILE); isolated = node_page_state(pgdat, NR_ISOLATED_FILE); } else { inactive = node_page_state(pgdat, NR_INACTIVE_ANON); isolated = node_page_state(pgdat, NR_ISOLATED_ANON); } /* * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they * won't get blocked by normal direct-reclaimers, forming a circular * deadlock. */ if (gfp_has_io_fs(sc->gfp_mask)) inactive >>= 3; too_many = isolated > inactive; /* Wake up tasks throttled due to too_many_isolated. */ if (!too_many) wake_throttle_isolated(pgdat); return too_many; } /* * move_folios_to_lru() moves folios from private @list to appropriate LRU list. * * Returns the number of pages moved to the given lruvec. */ static unsigned int move_folios_to_lru(struct lruvec *lruvec, struct list_head *list) { int nr_pages, nr_moved = 0; struct folio_batch free_folios; folio_batch_init(&free_folios); while (!list_empty(list)) { struct folio *folio = lru_to_folio(list); VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); list_del(&folio->lru); if (unlikely(!folio_evictable(folio))) { spin_unlock_irq(&lruvec->lru_lock); folio_putback_lru(folio); spin_lock_irq(&lruvec->lru_lock); continue; } /* * The folio_set_lru needs to be kept here for list integrity. * Otherwise: * #0 move_folios_to_lru #1 release_pages * if (!folio_put_testzero()) * if (folio_put_testzero()) * !lru //skip lru_lock * folio_set_lru() * list_add(&folio->lru,) * list_add(&folio->lru,) */ folio_set_lru(folio); if (unlikely(folio_put_testzero(folio))) { __folio_clear_lru_flags(folio); folio_undo_large_rmappable(folio); if (folio_batch_add(&free_folios, folio) == 0) { spin_unlock_irq(&lruvec->lru_lock); mem_cgroup_uncharge_folios(&free_folios); free_unref_folios(&free_folios); spin_lock_irq(&lruvec->lru_lock); } continue; } /* * All pages were isolated from the same lruvec (and isolation * inhibits memcg migration). */ VM_BUG_ON_FOLIO(!folio_matches_lruvec(folio, lruvec), folio); lruvec_add_folio(lruvec, folio); nr_pages = folio_nr_pages(folio); nr_moved += nr_pages; if (folio_test_active(folio)) workingset_age_nonresident(lruvec, nr_pages); } if (free_folios.nr) { spin_unlock_irq(&lruvec->lru_lock); mem_cgroup_uncharge_folios(&free_folios); free_unref_folios(&free_folios); spin_lock_irq(&lruvec->lru_lock); } return nr_moved; } /* * If a kernel thread (such as nfsd for loop-back mounts) services a backing * device by writing to the page cache it sets PF_LOCAL_THROTTLE. In this case * we should not throttle. Otherwise it is safe to do so. */ static int current_may_throttle(void) { return !(current->flags & PF_LOCAL_THROTTLE); } /* * shrink_inactive_list() is a helper for shrink_node(). It returns the number * of reclaimed pages */ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru) { LIST_HEAD(folio_list); unsigned long nr_scanned; unsigned int nr_reclaimed = 0; unsigned long nr_taken; struct reclaim_stat stat; bool file = is_file_lru(lru); enum vm_event_item item; struct pglist_data *pgdat = lruvec_pgdat(lruvec); bool stalled = false; while (unlikely(too_many_isolated(pgdat, file, sc))) { if (stalled) return 0; /* wait a bit for the reclaimer. */ stalled = true; reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) return SWAP_CLUSTER_MAX; } lru_add_drain(); spin_lock_irq(&lruvec->lru_lock); nr_taken = isolate_lru_folios(nr_to_scan, lruvec, &folio_list, &nr_scanned, sc, lru); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); item = PGSCAN_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_scanned); __count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned); __count_vm_events(PGSCAN_ANON + file, nr_scanned); spin_unlock_irq(&lruvec->lru_lock); if (nr_taken == 0) return 0; nr_reclaimed = shrink_folio_list(&folio_list, pgdat, sc, &stat, false); spin_lock_irq(&lruvec->lru_lock); move_folios_to_lru(lruvec, &folio_list); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); item = PGSTEAL_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_reclaimed); __count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed); __count_vm_events(PGSTEAL_ANON + file, nr_reclaimed); spin_unlock_irq(&lruvec->lru_lock); lru_note_cost(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed); /* * If dirty folios are scanned that are not queued for IO, it * implies that flushers are not doing their job. This can * happen when memory pressure pushes dirty folios to the end of * the LRU before the dirty limits are breached and the dirty * data has expired. It can also happen when the proportion of * dirty folios grows not through writes but through memory * pressure reclaiming all the clean cache. And in some cases, * the flushers simply cannot keep up with the allocation * rate. Nudge the flusher threads in case they are asleep. */ if (stat.nr_unqueued_dirty == nr_taken) { wakeup_flusher_threads(WB_REASON_VMSCAN); /* * For cgroupv1 dirty throttling is achieved by waking up * the kernel flusher here and later waiting on folios * which are in writeback to finish (see shrink_folio_list()). * * Flusher may not be able to issue writeback quickly * enough for cgroupv1 writeback throttling to work * on a large system. */ if (!writeback_throttling_sane(sc)) reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); } sc->nr.dirty += stat.nr_dirty; sc->nr.congested += stat.nr_congested; sc->nr.unqueued_dirty += stat.nr_unqueued_dirty; sc->nr.writeback += stat.nr_writeback; sc->nr.immediate += stat.nr_immediate; sc->nr.taken += nr_taken; if (file) sc->nr.file_taken += nr_taken; trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, nr_scanned, nr_reclaimed, &stat, sc->priority, file); return nr_reclaimed; } /* * shrink_active_list() moves folios from the active LRU to the inactive LRU. * * We move them the other way if the folio is referenced by one or more * processes. * * If the folios are mostly unmapped, the processing is fast and it is * appropriate to hold lru_lock across the whole operation. But if * the folios are mapped, the processing is slow (folio_referenced()), so * we should drop lru_lock around each folio. It's impossible to balance * this, so instead we remove the folios from the LRU while processing them. * It is safe to rely on the active flag against the non-LRU folios in here * because nobody will play with that bit on a non-LRU folio. * * The downside is that we have to touch folio->_refcount against each folio. * But we had to alter folio->flags anyway. */ static void shrink_active_list(unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru) { unsigned long nr_taken; unsigned long nr_scanned; unsigned long vm_flags; LIST_HEAD(l_hold); /* The folios which were snipped off */ LIST_HEAD(l_active); LIST_HEAD(l_inactive); unsigned nr_deactivate, nr_activate; unsigned nr_rotated = 0; bool file = is_file_lru(lru); struct pglist_data *pgdat = lruvec_pgdat(lruvec); lru_add_drain(); spin_lock_irq(&lruvec->lru_lock); nr_taken = isolate_lru_folios(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, lru); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); if (!cgroup_reclaim(sc)) __count_vm_events(PGREFILL, nr_scanned); __count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned); spin_unlock_irq(&lruvec->lru_lock); while (!list_empty(&l_hold)) { struct folio *folio; cond_resched(); folio = lru_to_folio(&l_hold); list_del(&folio->lru); if (unlikely(!folio_evictable(folio))) { folio_putback_lru(folio); continue; } if (unlikely(buffer_heads_over_limit)) { if (folio_needs_release(folio) && folio_trylock(folio)) { filemap_release_folio(folio, 0); folio_unlock(folio); } } /* Referenced or rmap lock contention: rotate */ if (folio_referenced(folio, 0, sc->target_mem_cgroup, &vm_flags) != 0) { /* * Identify referenced, file-backed active folios and * give them one more trip around the active list. So * that executable code get better chances to stay in * memory under moderate memory pressure. Anon folios * are not likely to be evicted by use-once streaming * IO, plus JVM can create lots of anon VM_EXEC folios, * so we ignore them here. */ if ((vm_flags & VM_EXEC) && folio_is_file_lru(folio)) { nr_rotated += folio_nr_pages(folio); list_add(&folio->lru, &l_active); continue; } } folio_clear_active(folio); /* we are de-activating */ folio_set_workingset(folio); list_add(&folio->lru, &l_inactive); } /* * Move folios back to the lru list. */ spin_lock_irq(&lruvec->lru_lock); nr_activate = move_folios_to_lru(lruvec, &l_active); nr_deactivate = move_folios_to_lru(lruvec, &l_inactive); __count_vm_events(PGDEACTIVATE, nr_deactivate); __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&lruvec->lru_lock); if (nr_rotated) lru_note_cost(lruvec, file, 0, nr_rotated); trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate, nr_deactivate, nr_rotated, sc->priority, file); } static unsigned int reclaim_folio_list(struct list_head *folio_list, struct pglist_data *pgdat) { struct reclaim_stat dummy_stat; unsigned int nr_reclaimed; struct folio *folio; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .may_writepage = 1, .may_unmap = 1, .may_swap = 1, .no_demotion = 1, }; nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, true); while (!list_empty(folio_list)) { folio = lru_to_folio(folio_list); list_del(&folio->lru); folio_putback_lru(folio); } return nr_reclaimed; } unsigned long reclaim_pages(struct list_head *folio_list) { int nid; unsigned int nr_reclaimed = 0; LIST_HEAD(node_folio_list); unsigned int noreclaim_flag; if (list_empty(folio_list)) return nr_reclaimed; noreclaim_flag = memalloc_noreclaim_save(); nid = folio_nid(lru_to_folio(folio_list)); do { struct folio *folio = lru_to_folio(folio_list); if (nid == folio_nid(folio)) { folio_clear_active(folio); list_move(&folio->lru, &node_folio_list); continue; } nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); nid = folio_nid(lru_to_folio(folio_list)); } while (!list_empty(folio_list)); nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); memalloc_noreclaim_restore(noreclaim_flag); return nr_reclaimed; } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc) { if (is_active_lru(lru)) { if (sc->may_deactivate & (1 << is_file_lru(lru))) shrink_active_list(nr_to_scan, lruvec, sc, lru); else sc->skipped_deactivate = 1; return 0; } return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); } /* * The inactive anon list should be small enough that the VM never has * to do too much work. * * The inactive file list should be small enough to leave most memory * to the established workingset on the scan-resistant active list, * but large enough to avoid thrashing the aggregate readahead window. * * Both inactive lists should also be large enough that each inactive * folio has a chance to be referenced again before it is reclaimed. * * If that fails and refaulting is observed, the inactive list grows. * * The inactive_ratio is the target ratio of ACTIVE to INACTIVE folios * on this LRU, maintained by the pageout code. An inactive_ratio * of 3 means 3:1 or 25% of the folios are kept on the inactive list. * * total target max * memory ratio inactive * ------------------------------------- * 10MB 1 5MB * 100MB 1 50MB * 1GB 3 250MB * 10GB 10 0.9GB * 100GB 31 3GB * 1TB 101 10GB * 10TB 320 32GB */ static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru) { enum lru_list active_lru = inactive_lru + LRU_ACTIVE; unsigned long inactive, active; unsigned long inactive_ratio; unsigned long gb; inactive = lruvec_page_state(lruvec, NR_LRU_BASE + inactive_lru); active = lruvec_page_state(lruvec, NR_LRU_BASE + active_lru); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) inactive_ratio = int_sqrt(10 * gb); else inactive_ratio = 1; return inactive * inactive_ratio < active; } enum scan_balance { SCAN_EQUAL, SCAN_FRACT, SCAN_ANON, SCAN_FILE, }; static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc) { unsigned long file; struct lruvec *target_lruvec; if (lru_gen_enabled()) return; target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); /* * Flush the memory cgroup stats, so that we read accurate per-memcg * lruvec stats for heuristics. */ mem_cgroup_flush_stats(sc->target_mem_cgroup); /* * Determine the scan balance between anon and file LRUs. */ spin_lock_irq(&target_lruvec->lru_lock); sc->anon_cost = target_lruvec->anon_cost; sc->file_cost = target_lruvec->file_cost; spin_unlock_irq(&target_lruvec->lru_lock); /* * Target desirable inactive:active list ratios for the anon * and file LRU lists. */ if (!sc->force_deactivate) { unsigned long refaults; /* * When refaults are being observed, it means a new * workingset is being established. Deactivate to get * rid of any stale active pages quickly. */ refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); if (refaults != target_lruvec->refaults[WORKINGSET_ANON] || inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) sc->may_deactivate |= DEACTIVATE_ANON; else sc->may_deactivate &= ~DEACTIVATE_ANON; refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_FILE); if (refaults != target_lruvec->refaults[WORKINGSET_FILE] || inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) sc->may_deactivate |= DEACTIVATE_FILE; else sc->may_deactivate &= ~DEACTIVATE_FILE; } else sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; /* * If we have plenty of inactive file pages that aren't * thrashing, try to reclaim those first before touching * anonymous pages. */ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE) && !sc->no_cache_trim_mode) sc->cache_trim_mode = 1; else sc->cache_trim_mode = 0; /* * Prevent the reclaimer from falling into the cache trap: as * cache pages start out inactive, every cache fault will tip * the scan balance towards the file LRU. And as the file LRU * shrinks, so does the window for rotation from references. * This means we have a runaway feedback loop where a tiny * thrashing file LRU becomes infinitely more attractive than * anon pages. Try to detect this based on file LRU size. */ if (!cgroup_reclaim(sc)) { unsigned long total_high_wmark = 0; unsigned long free, anon; int z; free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); file = node_page_state(pgdat, NR_ACTIVE_FILE) + node_page_state(pgdat, NR_INACTIVE_FILE); for (z = 0; z < MAX_NR_ZONES; z++) { struct zone *zone = &pgdat->node_zones[z]; if (!managed_zone(zone)) continue; total_high_wmark += high_wmark_pages(zone); } /* * Consider anon: if that's low too, this isn't a * runaway file reclaim problem, but rather just * extreme pressure. Reclaim as per usual then. */ anon = node_page_state(pgdat, NR_INACTIVE_ANON); sc->file_is_tiny = file + free <= total_high_wmark && !(sc->may_deactivate & DEACTIVATE_ANON) && anon >> sc->priority; } } /* * Determine how aggressively the anon and file LRU lists should be * scanned. * * nr[0] = anon inactive folios to scan; nr[1] = anon active folios to scan * nr[2] = file inactive folios to scan; nr[3] = file active folios to scan */ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long *nr) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct mem_cgroup *memcg = lruvec_memcg(lruvec); unsigned long anon_cost, file_cost, total_cost; int swappiness = sc_swappiness(sc, memcg); u64 fraction[ANON_AND_FILE]; u64 denominator = 0; /* gcc */ enum scan_balance scan_balance; unsigned long ap, fp; enum lru_list lru; /* If we have no swap space, do not bother scanning anon folios. */ if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) { scan_balance = SCAN_FILE; goto out; } /* * Global reclaim will swap to prevent OOM even with no * swappiness, but memcg users want to use this knob to * disable swapping for individual groups completely when * using the memory controller's swap limit feature would be * too expensive. */ if (cgroup_reclaim(sc) && !swappiness) { scan_balance = SCAN_FILE; goto out; } /* * Do not apply any pressure balancing cleverness when the * system is close to OOM, scan both anon and file equally * (unless the swappiness setting disagrees with swapping). */ if (!sc->priority && swappiness) { scan_balance = SCAN_EQUAL; goto out; } /* * If the system is almost out of file pages, force-scan anon. */ if (sc->file_is_tiny) { scan_balance = SCAN_ANON; goto out; } /* * If there is enough inactive page cache, we do not reclaim * anything from the anonymous working right now. */ if (sc->cache_trim_mode) { scan_balance = SCAN_FILE; goto out; } scan_balance = SCAN_FRACT; /* * Calculate the pressure balance between anon and file pages. * * The amount of pressure we put on each LRU is inversely * proportional to the cost of reclaiming each list, as * determined by the share of pages that are refaulting, times * the relative IO cost of bringing back a swapped out * anonymous page vs reloading a filesystem page (swappiness). * * Although we limit that influence to ensure no list gets * left behind completely: at least a third of the pressure is * applied, before swappiness. * * With swappiness at 100, anon and file have equal IO cost. */ total_cost = sc->anon_cost + sc->file_cost; anon_cost = total_cost + sc->anon_cost; file_cost = total_cost + sc->file_cost; total_cost = anon_cost + file_cost; ap = swappiness * (total_cost + 1); ap /= anon_cost + 1; fp = (MAX_SWAPPINESS - swappiness) * (total_cost + 1); fp /= file_cost + 1; fraction[0] = ap; fraction[1] = fp; denominator = ap + fp; out: for_each_evictable_lru(lru) { bool file = is_file_lru(lru); unsigned long lruvec_size; unsigned long low, min; unsigned long scan; lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); mem_cgroup_protection(sc->target_mem_cgroup, memcg, &min, &low); if (min || low) { /* * Scale a cgroup's reclaim pressure by proportioning * its current usage to its memory.low or memory.min * setting. * * This is important, as otherwise scanning aggression * becomes extremely binary -- from nothing as we * approach the memory protection threshold, to totally * nominal as we exceed it. This results in requiring * setting extremely liberal protection thresholds. It * also means we simply get no protection at all if we * set it too low, which is not ideal. * * If there is any protection in place, we reduce scan * pressure by how much of the total memory used is * within protection thresholds. * * There is one special case: in the first reclaim pass, * we skip over all groups that are within their low * protection. If that fails to reclaim enough pages to * satisfy the reclaim goal, we come back and override * the best-effort low protection. However, we still * ideally want to honor how well-behaved groups are in * that case instead of simply punishing them all * equally. As such, we reclaim them based on how much * memory they are using, reducing the scan pressure * again by how much of the total memory used is under * hard protection. */ unsigned long cgroup_size = mem_cgroup_size(memcg); unsigned long protection; /* memory.low scaling, make sure we retry before OOM */ if (!sc->memcg_low_reclaim && low > min) { protection = low; sc->memcg_low_skipped = 1; } else { protection = min; } /* Avoid TOCTOU with earlier protection check */ cgroup_size = max(cgroup_size, protection); scan = lruvec_size - lruvec_size * protection / (cgroup_size + 1); /* * Minimally target SWAP_CLUSTER_MAX pages to keep * reclaim moving forwards, avoiding decrementing * sc->priority further than desirable. */ scan = max(scan, SWAP_CLUSTER_MAX); } else { scan = lruvec_size; } scan >>= sc->priority; /* * If the cgroup's already been deleted, make sure to * scrape out the remaining cache. */ if (!scan && !mem_cgroup_online(memcg)) scan = min(lruvec_size, SWAP_CLUSTER_MAX); switch (scan_balance) { case SCAN_EQUAL: /* Scan lists relative to size */ break; case SCAN_FRACT: /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. * Make sure we don't miss the last page on * the offlined memory cgroups because of a * round-off error. */ scan = mem_cgroup_online(memcg) ? div64_u64(scan * fraction[file], denominator) : DIV64_U64_ROUND_UP(scan * fraction[file], denominator); break; case SCAN_FILE: case SCAN_ANON: /* Scan one type exclusively */ if ((scan_balance == SCAN_FILE) != file) scan = 0; break; default: /* Look ma, no brain */ BUG(); } nr[lru] = scan; } } /* * Anonymous LRU management is a waste if there is * ultimately no way to reclaim the memory. */ static bool can_age_anon_pages(struct pglist_data *pgdat, struct scan_control *sc) { /* Aging the anon LRU is valuable if swap is present: */ if (total_swap_pages > 0) return true; /* Also valuable if anon pages can be demoted: */ return can_demote(pgdat->node_id, sc); } #ifdef CONFIG_LRU_GEN #ifdef CONFIG_LRU_GEN_ENABLED DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS); #define get_cap(cap) static_branch_likely(&lru_gen_caps[cap]) #else DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); #define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap]) #endif static bool should_walk_mmu(void) { return arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK); } static bool should_clear_pmd_young(void) { return arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG); } /****************************************************************************** * shorthand helpers ******************************************************************************/ #define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) #define DEFINE_MAX_SEQ(lruvec) \ unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq) #define DEFINE_MIN_SEQ(lruvec) \ unsigned long min_seq[ANON_AND_FILE] = { \ READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]), \ READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \ } #define for_each_gen_type_zone(gen, type, zone) \ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) #define get_memcg_gen(seq) ((seq) % MEMCG_NR_GENS) #define get_memcg_bin(bin) ((bin) % MEMCG_NR_BINS) static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) { struct pglist_data *pgdat = NODE_DATA(nid); #ifdef CONFIG_MEMCG if (memcg) { struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec; /* see the comment in mem_cgroup_lruvec() */ if (!lruvec->pgdat) lruvec->pgdat = pgdat; return lruvec; } #endif VM_WARN_ON_ONCE(!mem_cgroup_disabled()); return &pgdat->__lruvec; } static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) { struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); if (!sc->may_swap) return 0; if (!can_demote(pgdat->node_id, sc) && mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) return 0; return sc_swappiness(sc, memcg); } static int get_nr_gens(struct lruvec *lruvec, int type) { return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1; } static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) { /* see the comment on lru_gen_folio */ return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS && get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) && get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS; } /****************************************************************************** * Bloom filters ******************************************************************************/ /* * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of * bits in a bitmap, k is the number of hash functions and n is the number of * inserted items. * * Page table walkers use one of the two filters to reduce their search space. * To get rid of non-leaf entries that no longer have enough leaf entries, the * aging uses the double-buffering technique to flip to the other filter each * time it produces a new generation. For non-leaf entries that have enough * leaf entries, the aging carries them over to the next generation in * walk_pmd_range(); the eviction also report them when walking the rmap * in lru_gen_look_around(). * * For future optimizations: * 1. It's not necessary to keep both filters all the time. The spare one can be * freed after the RCU grace period and reallocated if needed again. * 2. And when reallocating, it's worth scaling its size according to the number * of inserted entries in the other filter, to reduce the memory overhead on * small systems and false positives on large systems. * 3. Jenkins' hash function is an alternative to Knuth's. */ #define BLOOM_FILTER_SHIFT 15 static inline int filter_gen_from_seq(unsigned long seq) { return seq % NR_BLOOM_FILTERS; } static void get_item_key(void *item, int *key) { u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2); BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32)); key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1); key[1] = hash >> BLOOM_FILTER_SHIFT; } static bool test_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq, void *item) { int key[2]; unsigned long *filter; int gen = filter_gen_from_seq(seq); filter = READ_ONCE(mm_state->filters[gen]); if (!filter) return true; get_item_key(item, key); return test_bit(key[0], filter) && test_bit(key[1], filter); } static void update_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq, void *item) { int key[2]; unsigned long *filter; int gen = filter_gen_from_seq(seq); filter = READ_ONCE(mm_state->filters[gen]); if (!filter) return; get_item_key(item, key); if (!test_bit(key[0], filter)) set_bit(key[0], filter); if (!test_bit(key[1], filter)) set_bit(key[1], filter); } static void reset_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq) { unsigned long *filter; int gen = filter_gen_from_seq(seq); filter = mm_state->filters[gen]; if (filter) { bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT)); return; } filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); WRITE_ONCE(mm_state->filters[gen], filter); } /****************************************************************************** * mm_struct list ******************************************************************************/ #ifdef CONFIG_LRU_GEN_WALKS_MMU static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) { static struct lru_gen_mm_list mm_list = { .fifo = LIST_HEAD_INIT(mm_list.fifo), .lock = __SPIN_LOCK_UNLOCKED(mm_list.lock), }; #ifdef CONFIG_MEMCG if (memcg) return &memcg->mm_list; #endif VM_WARN_ON_ONCE(!mem_cgroup_disabled()); return &mm_list; } static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec) { return &lruvec->mm_state; } static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk) { int key; struct mm_struct *mm; struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec); mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap); if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap)) return NULL; clear_bit(key, &mm->lru_gen.bitmap); return mmget_not_zero(mm) ? mm : NULL; } void lru_gen_add_mm(struct mm_struct *mm) { int nid; struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); VM_WARN_ON_ONCE(!list_empty(&mm->lru_gen.list)); #ifdef CONFIG_MEMCG VM_WARN_ON_ONCE(mm->lru_gen.memcg); mm->lru_gen.memcg = memcg; #endif spin_lock(&mm_list->lock); for_each_node_state(nid, N_MEMORY) { struct lruvec *lruvec = get_lruvec(memcg, nid); struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); /* the first addition since the last iteration */ if (mm_state->tail == &mm_list->fifo) mm_state->tail = &mm->lru_gen.list; } list_add_tail(&mm->lru_gen.list, &mm_list->fifo); spin_unlock(&mm_list->lock); } void lru_gen_del_mm(struct mm_struct *mm) { int nid; struct lru_gen_mm_list *mm_list; struct mem_cgroup *memcg = NULL; if (list_empty(&mm->lru_gen.list)) return; #ifdef CONFIG_MEMCG memcg = mm->lru_gen.memcg; #endif mm_list = get_mm_list(memcg); spin_lock(&mm_list->lock); for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); /* where the current iteration continues after */ if (mm_state->head == &mm->lru_gen.list) mm_state->head = mm_state->head->prev; /* where the last iteration ended before */ if (mm_state->tail == &mm->lru_gen.list) mm_state->tail = mm_state->tail->next; } list_del_init(&mm->lru_gen.list); spin_unlock(&mm_list->lock); #ifdef CONFIG_MEMCG mem_cgroup_put(mm->lru_gen.memcg); mm->lru_gen.memcg = NULL; #endif } #ifdef CONFIG_MEMCG void lru_gen_migrate_mm(struct mm_struct *mm) { struct mem_cgroup *memcg; struct task_struct *task = rcu_dereference_protected(mm->owner, true); VM_WARN_ON_ONCE(task->mm != mm); lockdep_assert_held(&task->alloc_lock); /* for mm_update_next_owner() */ if (mem_cgroup_disabled()) return; /* migration can happen before addition */ if (!mm->lru_gen.memcg) return; rcu_read_lock(); memcg = mem_cgroup_from_task(task); rcu_read_unlock(); if (memcg == mm->lru_gen.memcg) return; VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list)); lru_gen_del_mm(mm); lru_gen_add_mm(mm); } #endif #else /* !CONFIG_LRU_GEN_WALKS_MMU */ static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) { return NULL; } static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec) { return NULL; } static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk) { return NULL; } #endif static void reset_mm_stats(struct lru_gen_mm_walk *walk, bool last) { int i; int hist; struct lruvec *lruvec = walk->lruvec; struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); hist = lru_hist_from_seq(walk->seq); for (i = 0; i < NR_MM_STATS; i++) { WRITE_ONCE(mm_state->stats[hist][i], mm_state->stats[hist][i] + walk->mm_stats[i]); walk->mm_stats[i] = 0; } if (NR_HIST_GENS > 1 && last) { hist = lru_hist_from_seq(walk->seq + 1); for (i = 0; i < NR_MM_STATS; i++) WRITE_ONCE(mm_state->stats[hist][i], 0); } } static bool iterate_mm_list(struct lru_gen_mm_walk *walk, struct mm_struct **iter) { bool first = false; bool last = false; struct mm_struct *mm = NULL; struct lruvec *lruvec = walk->lruvec; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); /* * mm_state->seq is incremented after each iteration of mm_list. There * are three interesting cases for this page table walker: * 1. It tries to start a new iteration with a stale max_seq: there is * nothing left to do. * 2. It started the next iteration: it needs to reset the Bloom filter * so that a fresh set of PTE tables can be recorded. * 3. It ended the current iteration: it needs to reset the mm stats * counters and tell its caller to increment max_seq. */ spin_lock(&mm_list->lock); VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->seq); if (walk->seq <= mm_state->seq) goto done; if (!mm_state->head) mm_state->head = &mm_list->fifo; if (mm_state->head == &mm_list->fifo) first = true; do { mm_state->head = mm_state->head->next; if (mm_state->head == &mm_list->fifo) { WRITE_ONCE(mm_state->seq, mm_state->seq + 1); last = true; break; } /* force scan for those added after the last iteration */ if (!mm_state->tail || mm_state->tail == mm_state->head) { mm_state->tail = mm_state->head->next; walk->force_scan = true; } } while (!(mm = get_next_mm(walk))); done: if (*iter || last) reset_mm_stats(walk, last); spin_unlock(&mm_list->lock); if (mm && first) reset_bloom_filter(mm_state, walk->seq + 1); if (*iter) mmput_async(*iter); *iter = mm; return last; } static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long seq) { bool success = false; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct lru_gen_mm_list *mm_list = get_mm_list(memcg); struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); spin_lock(&mm_list->lock); VM_WARN_ON_ONCE(mm_state->seq + 1 < seq); if (seq > mm_state->seq) { mm_state->head = NULL; mm_state->tail = NULL; WRITE_ONCE(mm_state->seq, mm_state->seq + 1); success = true; } spin_unlock(&mm_list->lock); return success; } /****************************************************************************** * PID controller ******************************************************************************/ /* * A feedback loop based on Proportional-Integral-Derivative (PID) controller. * * The P term is refaulted/(evicted+protected) from a tier in the generation * currently being evicted; the I term is the exponential moving average of the * P term over the generations previously evicted, using the smoothing factor * 1/2; the D term isn't supported. * * The setpoint (SP) is always the first tier of one type; the process variable * (PV) is either any tier of the other type or any other tier of the same * type. * * The error is the difference between the SP and the PV; the correction is to * turn off protection when SP>PV or turn on protection when SP<PV. * * For future optimizations: * 1. The D term may discount the other two terms over time so that long-lived * generations can resist stale information. */ struct ctrl_pos { unsigned long refaulted; unsigned long total; int gain; }; static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain, struct ctrl_pos *pos) { struct lru_gen_folio *lrugen = &lruvec->lrugen; int hist = lru_hist_from_seq(lrugen->min_seq[type]); pos->refaulted = lrugen->avg_refaulted[type][tier] + atomic_long_read(&lrugen->refaulted[hist][type][tier]); pos->total = lrugen->avg_total[type][tier] + atomic_long_read(&lrugen->evicted[hist][type][tier]); if (tier) pos->total += lrugen->protected[hist][type][tier - 1]; pos->gain = gain; } static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) { int hist, tier; struct lru_gen_folio *lrugen = &lruvec->lrugen; bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; lockdep_assert_held(&lruvec->lru_lock); if (!carryover && !clear) return; hist = lru_hist_from_seq(seq); for (tier = 0; tier < MAX_NR_TIERS; tier++) { if (carryover) { unsigned long sum; sum = lrugen->avg_refaulted[type][tier] + atomic_long_read(&lrugen->refaulted[hist][type][tier]); WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2); sum = lrugen->avg_total[type][tier] + atomic_long_read(&lrugen->evicted[hist][type][tier]); if (tier) sum += lrugen->protected[hist][type][tier - 1]; WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2); } if (clear) { atomic_long_set(&lrugen->refaulted[hist][type][tier], 0); atomic_long_set(&lrugen->evicted[hist][type][tier], 0); if (tier) WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0); } } } static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv) { /* * Return true if the PV has a limited number of refaults or a lower * refaulted/total than the SP. */ return pv->refaulted < MIN_LRU_BATCH || pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <= (sp->refaulted + 1) * pv->total * pv->gain; } /****************************************************************************** * the aging ******************************************************************************/ /* promote pages accessed through page tables */ static int folio_update_gen(struct folio *folio, int gen) { unsigned long new_flags, old_flags = READ_ONCE(folio->flags); VM_WARN_ON_ONCE(gen >= MAX_NR_GENS); VM_WARN_ON_ONCE(!rcu_read_lock_held()); do { /* lru_gen_del_folio() has isolated this page? */ if (!(old_flags & LRU_GEN_MASK)) { /* for shrink_folio_list() */ new_flags = old_flags | BIT(PG_referenced); continue; } new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS); new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; } /* protect pages accessed multiple times through file descriptors */ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { int type = folio_is_file_lru(folio); struct lru_gen_folio *lrugen = &lruvec->lrugen; int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); unsigned long new_flags, old_flags = READ_ONCE(folio->flags); VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio); do { new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; /* folio_update_gen() has promoted this page? */ if (new_gen >= 0 && new_gen != old_gen) return new_gen; new_gen = (old_gen + 1) % MAX_NR_GENS; new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS); new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF; /* for folio_end_writeback() */ if (reclaiming) new_flags |= BIT(PG_reclaim); } while (!try_cmpxchg(&folio->flags, &old_flags, new_flags)); lru_gen_update_size(lruvec, folio, old_gen, new_gen); return new_gen; } static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio, int old_gen, int new_gen) { int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); int delta = folio_nr_pages(folio); VM_WARN_ON_ONCE(old_gen >= MAX_NR_GENS); VM_WARN_ON_ONCE(new_gen >= MAX_NR_GENS); walk->batched++; walk->nr_pages[old_gen][type][zone] -= delta; walk->nr_pages[new_gen][type][zone] += delta; } static void reset_batch_size(struct lru_gen_mm_walk *walk) { int gen, type, zone; struct lruvec *lruvec = walk->lruvec; struct lru_gen_folio *lrugen = &lruvec->lrugen; walk->batched = 0; for_each_gen_type_zone(gen, type, zone) { enum lru_list lru = type * LRU_INACTIVE_FILE; int delta = walk->nr_pages[gen][type][zone]; if (!delta) continue; walk->nr_pages[gen][type][zone] = 0; WRITE_ONCE(lrugen->nr_pages[gen][type][zone], lrugen->nr_pages[gen][type][zone] + delta); if (lru_gen_is_active(lruvec, gen)) lru += LRU_ACTIVE; __update_lru_size(lruvec, lru, zone, delta); } } static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args) { struct address_space *mapping; struct vm_area_struct *vma = args->vma; struct lru_gen_mm_walk *walk = args->private; if (!vma_is_accessible(vma)) return true; if (is_vm_hugetlb_page(vma)) return true; if (!vma_has_recency(vma)) return true; if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) return true; if (vma == get_gate_vma(vma->vm_mm)) return true; if (vma_is_anonymous(vma)) return !walk->can_swap; if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping)) return true; mapping = vma->vm_file->f_mapping; if (mapping_unevictable(mapping)) return true; if (shmem_mapping(mapping)) return !walk->can_swap; /* to exclude special mappings like dax, etc. */ return !mapping->a_ops->read_folio; } /* * Some userspace memory allocators map many single-page VMAs. Instead of * returning back to the PGD table for each of such VMAs, finish an entire PMD * table to reduce zigzags and improve cache performance. */ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk *args, unsigned long *vm_start, unsigned long *vm_end) { unsigned long start = round_up(*vm_end, size); unsigned long end = (start | ~mask) + 1; VMA_ITERATOR(vmi, args->mm, start); VM_WARN_ON_ONCE(mask & size); VM_WARN_ON_ONCE((start & mask) != (*vm_start & mask)); for_each_vma(vmi, args->vma) { if (end && end <= args->vma->vm_start) return false; if (should_skip_vma(args->vma->vm_start, args->vma->vm_end, args)) continue; *vm_start = max(start, args->vma->vm_start); *vm_end = min(end - 1, args->vma->vm_end - 1) + 1; return true; } return false; } static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr) { unsigned long pfn = pte_pfn(pte); VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end); if (!pte_present(pte) || is_zero_pfn(pfn)) return -1; if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte))) return -1; if (WARN_ON_ONCE(!pfn_valid(pfn))) return -1; return pfn; } static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr) { unsigned long pfn = pmd_pfn(pmd); VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end); if (!pmd_present(pmd) || is_huge_zero_pmd(pmd)) return -1; if (WARN_ON_ONCE(pmd_devmap(pmd))) return -1; if (WARN_ON_ONCE(!pfn_valid(pfn))) return -1; return pfn; } static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg, struct pglist_data *pgdat, bool can_swap) { struct folio *folio; /* try to avoid unnecessary memory loads */ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) return NULL; folio = pfn_folio(pfn); if (folio_nid(folio) != pgdat->node_id) return NULL; if (folio_memcg_rcu(folio) != memcg) return NULL; /* file VMAs can contain anon pages from COW */ if (!folio_is_file_lru(folio) && !can_swap) return NULL; return folio; } static bool suitable_to_scan(int total, int young) { int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8); /* suitable if the average number of young PTEs per cacheline is >=1 */ return young * n >= total; } static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, struct mm_walk *args) { int i; pte_t *pte; spinlock_t *ptl; unsigned long addr; int total = 0; int young = 0; struct lru_gen_mm_walk *walk = args->private; struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec); struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); DEFINE_MAX_SEQ(walk->lruvec); int old_gen, new_gen = lru_gen_from_seq(max_seq); pte = pte_offset_map_nolock(args->mm, pmd, start & PMD_MASK, &ptl); if (!pte) return false; if (!spin_trylock(ptl)) { pte_unmap(pte); return false; } arch_enter_lazy_mmu_mode(); restart: for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) { unsigned long pfn; struct folio *folio; pte_t ptent = ptep_get(pte + i); total++; walk->mm_stats[MM_LEAF_TOTAL]++; pfn = get_pte_pfn(ptent, args->vma, addr); if (pfn == -1) continue; if (!pte_young(ptent)) { walk->mm_stats[MM_LEAF_OLD]++; continue; } folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap); if (!folio) continue; if (!ptep_test_and_clear_young(args->vma, addr, pte + i)) VM_WARN_ON_ONCE(true); young++; walk->mm_stats[MM_LEAF_YOUNG]++; if (pte_dirty(ptent) && !folio_test_dirty(folio) && !(folio_test_anon(folio) && folio_test_swapbacked(folio) && !folio_test_swapcache(folio))) folio_mark_dirty(folio); old_gen = folio_update_gen(folio, new_gen); if (old_gen >= 0 && old_gen != new_gen) update_batch_size(walk, folio, old_gen, new_gen); } if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end)) goto restart; arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte, ptl); return suitable_to_scan(total, young); } static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma, struct mm_walk *args, unsigned long *bitmap, unsigned long *first) { int i; pmd_t *pmd; spinlock_t *ptl; struct lru_gen_mm_walk *walk = args->private; struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec); struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); DEFINE_MAX_SEQ(walk->lruvec); int old_gen, new_gen = lru_gen_from_seq(max_seq); VM_WARN_ON_ONCE(pud_leaf(*pud)); /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ if (*first == -1) { *first = addr; bitmap_zero(bitmap, MIN_LRU_BATCH); return; } i = addr == -1 ? 0 : pmd_index(addr) - pmd_index(*first); if (i && i <= MIN_LRU_BATCH) { __set_bit(i - 1, bitmap); return; } pmd = pmd_offset(pud, *first); ptl = pmd_lockptr(args->mm, pmd); if (!spin_trylock(ptl)) goto done; arch_enter_lazy_mmu_mode(); do { unsigned long pfn; struct folio *folio; /* don't round down the first address */ addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first; pfn = get_pmd_pfn(pmd[i], vma, addr); if (pfn == -1) goto next; if (!pmd_trans_huge(pmd[i])) { if (should_clear_pmd_young()) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; } folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap); if (!folio) goto next; if (!pmdp_test_and_clear_young(vma, addr, pmd + i)) goto next; walk->mm_stats[MM_LEAF_YOUNG]++; if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) && !(folio_test_anon(folio) && folio_test_swapbacked(folio) && !folio_test_swapcache(folio))) folio_mark_dirty(folio); old_gen = folio_update_gen(folio, new_gen); if (old_gen >= 0 && old_gen != new_gen) update_batch_size(walk, folio, old_gen, new_gen); next: i = i > MIN_LRU_BATCH ? 0 : find_next_bit(bitmap, MIN_LRU_BATCH, i) + 1; } while (i <= MIN_LRU_BATCH); arch_leave_lazy_mmu_mode(); spin_unlock(ptl); done: *first = -1; } static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, struct mm_walk *args) { int i; pmd_t *pmd; unsigned long next; unsigned long addr; struct vm_area_struct *vma; DECLARE_BITMAP(bitmap, MIN_LRU_BATCH); unsigned long first = -1; struct lru_gen_mm_walk *walk = args->private; struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec); VM_WARN_ON_ONCE(pud_leaf(*pud)); /* * Finish an entire PMD in two passes: the first only reaches to PTE * tables to avoid taking the PMD lock; the second, if necessary, takes * the PMD lock to clear the accessed bit in PMD entries. */ pmd = pmd_offset(pud, start & PUD_MASK); restart: /* walk_pte_range() may call get_next_vma() */ vma = args->vma; for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) { pmd_t val = pmdp_get_lockless(pmd + i); next = pmd_addr_end(addr, end); if (!pmd_present(val) || is_huge_zero_pmd(val)) { walk->mm_stats[MM_LEAF_TOTAL]++; continue; } if (pmd_trans_huge(val)) { unsigned long pfn = pmd_pfn(val); struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); walk->mm_stats[MM_LEAF_TOTAL]++; if (!pmd_young(val)) { walk->mm_stats[MM_LEAF_OLD]++; continue; } /* try to avoid unnecessary memory loads */ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); continue; } walk->mm_stats[MM_NONLEAF_TOTAL]++; if (should_clear_pmd_young()) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); } if (!walk->force_scan && !test_bloom_filter(mm_state, walk->seq, pmd + i)) continue; walk->mm_stats[MM_NONLEAF_FOUND]++; if (!walk_pte_range(&val, addr, next, args)) continue; walk->mm_stats[MM_NONLEAF_ADDED]++; /* carry over to the next generation */ update_bloom_filter(mm_state, walk->seq + 1, pmd + i); } walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first); if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end)) goto restart; } static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, struct mm_walk *args) { int i; pud_t *pud; unsigned long addr; unsigned long next; struct lru_gen_mm_walk *walk = args->private; VM_WARN_ON_ONCE(p4d_leaf(*p4d)); pud = pud_offset(p4d, start & P4D_MASK); restart: for (i = pud_index(start), addr = start; addr != end; i++, addr = next) { pud_t val = READ_ONCE(pud[i]); next = pud_addr_end(addr, end); if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val))) continue; walk_pmd_range(&val, addr, next, args); if (need_resched() || walk->batched >= MAX_LRU_BATCH) { end = (addr | ~PUD_MASK) + 1; goto done; } } if (i < PTRS_PER_PUD && get_next_vma(P4D_MASK, PUD_SIZE, args, &start, &end)) goto restart; end = round_up(end, P4D_SIZE); done: if (!end || !args->vma) return 1; walk->next_addr = max(end, args->vma->vm_start); return -EAGAIN; } static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk) { static const struct mm_walk_ops mm_walk_ops = { .test_walk = should_skip_vma, .p4d_entry = walk_pud_range, .walk_lock = PGWALK_RDLOCK, }; int err; struct lruvec *lruvec = walk->lruvec; struct mem_cgroup *memcg = lruvec_memcg(lruvec); walk->next_addr = FIRST_USER_ADDRESS; do { DEFINE_MAX_SEQ(lruvec); err = -EBUSY; /* another thread might have called inc_max_seq() */ if (walk->seq != max_seq) break; /* folio_update_gen() requires stable folio_memcg() */ if (!mem_cgroup_trylock_pages(memcg)) break; /* the caller might be holding the lock for write */ if (mmap_read_trylock(mm)) { err = walk_page_range(mm, walk->next_addr, ULONG_MAX, &mm_walk_ops, walk); mmap_read_unlock(mm); } mem_cgroup_unlock_pages(); if (walk->batched) { spin_lock_irq(&lruvec->lru_lock); reset_batch_size(walk); spin_unlock_irq(&lruvec->lru_lock); } cond_resched(); } while (err == -EAGAIN); } static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc) { struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; if (pgdat && current_is_kswapd()) { VM_WARN_ON_ONCE(walk); walk = &pgdat->mm_walk; } else if (!walk && force_alloc) { VM_WARN_ON_ONCE(current_is_kswapd()); walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); } current->reclaim_state->mm_walk = walk; return walk; } static void clear_mm_walk(void) { struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk; VM_WARN_ON_ONCE(walk && memchr_inv(walk->nr_pages, 0, sizeof(walk->nr_pages))); VM_WARN_ON_ONCE(walk && memchr_inv(walk->mm_stats, 0, sizeof(walk->mm_stats))); current->reclaim_state->mm_walk = NULL; if (!current_is_kswapd()) kfree(walk); } static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap) { int zone; int remaining = MAX_LRU_BATCH; struct lru_gen_folio *lrugen = &lruvec->lrugen; int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); if (type == LRU_GEN_ANON && !can_swap) goto done; /* prevent cold/hot inversion if force_scan is true */ for (zone = 0; zone < MAX_NR_ZONES; zone++) { struct list_head *head = &lrugen->folios[old_gen][type][zone]; while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); new_gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[new_gen][type][zone]); if (!--remaining) return false; } } done: reset_ctrl_pos(lruvec, type, true); WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1); return true; } static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) { int gen, type, zone; bool success = false; struct lru_gen_folio *lrugen = &lruvec->lrugen; DEFINE_MIN_SEQ(lruvec); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); /* find the oldest populated generation */ for (type = !can_swap; type < ANON_AND_FILE; type++) { while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) { gen = lru_gen_from_seq(min_seq[type]); for (zone = 0; zone < MAX_NR_ZONES; zone++) { if (!list_empty(&lrugen->folios[gen][type][zone])) goto next; } min_seq[type]++; } next: ; } /* see the comment on lru_gen_folio */ if (can_swap) { min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); } for (type = !can_swap; type < ANON_AND_FILE; type++) { if (min_seq[type] == lrugen->min_seq[type]) continue; reset_ctrl_pos(lruvec, type, true); WRITE_ONCE(lrugen->min_seq[type], min_seq[type]); success = true; } return success; } static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq, bool can_swap, bool force_scan) { bool success; int prev, next; int type, zone; struct lru_gen_folio *lrugen = &lruvec->lrugen; restart: if (seq < READ_ONCE(lrugen->max_seq)) return false; spin_lock_irq(&lruvec->lru_lock); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); success = seq == lrugen->max_seq; if (!success) goto unlock; for (type = ANON_AND_FILE - 1; type >= 0; type--) { if (get_nr_gens(lruvec, type) != MAX_NR_GENS) continue; VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap)); if (inc_min_seq(lruvec, type, can_swap)) continue; spin_unlock_irq(&lruvec->lru_lock); cond_resched(); goto restart; } /* * Update the active/inactive LRU sizes for compatibility. Both sides of * the current max_seq need to be covered, since max_seq+1 can overlap * with min_seq[LRU_GEN_ANON] if swapping is constrained. And if they do * overlap, cold/hot inversion happens. */ prev = lru_gen_from_seq(lrugen->max_seq - 1); next = lru_gen_from_seq(lrugen->max_seq + 1); for (type = 0; type < ANON_AND_FILE; type++) { for (zone = 0; zone < MAX_NR_ZONES; zone++) { enum lru_list lru = type * LRU_INACTIVE_FILE; long delta = lrugen->nr_pages[prev][type][zone] - lrugen->nr_pages[next][type][zone]; if (!delta) continue; __update_lru_size(lruvec, lru, zone, delta); __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta); } } for (type = 0; type < ANON_AND_FILE; type++) reset_ctrl_pos(lruvec, type, false); WRITE_ONCE(lrugen->timestamps[next], jiffies); /* make sure preceding modifications appear */ smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1); unlock: spin_unlock_irq(&lruvec->lru_lock); return success; } static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq, bool can_swap, bool force_scan) { bool success; struct lru_gen_mm_walk *walk; struct mm_struct *mm = NULL; struct lru_gen_folio *lrugen = &lruvec->lrugen; struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); VM_WARN_ON_ONCE(seq > READ_ONCE(lrugen->max_seq)); if (!mm_state) return inc_max_seq(lruvec, seq, can_swap, force_scan); /* see the comment in iterate_mm_list() */ if (seq <= READ_ONCE(mm_state->seq)) return false; /* * If the hardware doesn't automatically set the accessed bit, fallback * to lru_gen_look_around(), which only clears the accessed bit in a * handful of PTEs. Spreading the work out over a period of time usually * is less efficient, but it avoids bursty page faults. */ if (!should_walk_mmu()) { success = iterate_mm_list_nowalk(lruvec, seq); goto done; } walk = set_mm_walk(NULL, true); if (!walk) { success = iterate_mm_list_nowalk(lruvec, seq); goto done; } walk->lruvec = lruvec; walk->seq = seq; walk->can_swap = can_swap; walk->force_scan = force_scan; do { success = iterate_mm_list(walk, &mm); if (mm) walk_mm(mm, walk); } while (mm); done: if (success) { success = inc_max_seq(lruvec, seq, can_swap, force_scan); WARN_ON_ONCE(!success); } return success; } /****************************************************************************** * working set protection ******************************************************************************/ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc) { int priority; unsigned long reclaimable; if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) return; /* * Determine the initial priority based on * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, * where reclaimed_to_scanned_ratio = inactive / total. */ reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc)) reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); /* round down reclaimable and round up sc->nr_to_reclaim */ priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1); /* * The estimation is based on LRU pages only, so cap it to prevent * overshoots of shrinker objects by large margins. */ sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY); } static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) { int gen, type, zone; unsigned long total = 0; bool can_swap = get_swappiness(lruvec, sc); struct lru_gen_folio *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec); for (type = !can_swap; type < ANON_AND_FILE; type++) { unsigned long seq; for (seq = min_seq[type]; seq <= max_seq; seq++) { gen = lru_gen_from_seq(seq); for (zone = 0; zone < MAX_NR_ZONES; zone++) total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L); } } /* whether the size is big enough to be helpful */ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; } static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl) { int gen; unsigned long birth; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MIN_SEQ(lruvec); if (mem_cgroup_below_min(NULL, memcg)) return false; if (!lruvec_is_sizable(lruvec, sc)) return false; /* see the comment on lru_gen_folio */ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); return time_is_before_jiffies(birth + min_ttl); } /* to protect the working set of the last N jiffies */ static unsigned long lru_gen_min_ttl __read_mostly; static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); bool reclaimable = !min_ttl; VM_WARN_ON_ONCE(!current_is_kswapd()); set_initial_priority(pgdat, sc); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); mem_cgroup_calculate_protection(NULL, memcg); if (!reclaimable) reclaimable = lruvec_is_reclaimable(lruvec, sc, min_ttl); } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); /* * The main goal is to OOM kill if every generation from all memcgs is * younger than min_ttl. However, another possibility is all memcgs are * either too small or below min. */ if (!reclaimable && mutex_trylock(&oom_lock)) { struct oom_control oc = { .gfp_mask = sc->gfp_mask, }; out_of_memory(&oc); mutex_unlock(&oom_lock); } } /****************************************************************************** * rmap/PT walk feedback ******************************************************************************/ /* * This function exploits spatial locality when shrink_folio_list() walks the * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If * the scan was done cacheline efficiently, it adds the PMD entry pointing to * the PTE table to the Bloom filter. This forms a feedback loop between the * eviction and the aging. */ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { int i; unsigned long start; unsigned long end; struct lru_gen_mm_walk *walk; int young = 0; pte_t *pte = pvmw->pte; unsigned long addr = pvmw->address; struct vm_area_struct *vma = pvmw->vma; struct folio *folio = pfn_folio(pvmw->pfn); bool can_swap = !folio_is_file_lru(folio); struct mem_cgroup *memcg = folio_memcg(folio); struct pglist_data *pgdat = folio_pgdat(folio); struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); DEFINE_MAX_SEQ(lruvec); int old_gen, new_gen = lru_gen_from_seq(max_seq); lockdep_assert_held(pvmw->ptl); VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio); if (spin_is_contended(pvmw->ptl)) return; /* exclude special VMAs containing anon pages from COW */ if (vma->vm_flags & VM_SPECIAL) return; /* avoid taking the LRU lock under the PTL when possible */ walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; start = max(addr & PMD_MASK, vma->vm_start); end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1; if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2) end = start + MIN_LRU_BATCH * PAGE_SIZE; else if (end - addr < MIN_LRU_BATCH * PAGE_SIZE / 2) start = end - MIN_LRU_BATCH * PAGE_SIZE; else { start = addr - MIN_LRU_BATCH * PAGE_SIZE / 2; end = addr + MIN_LRU_BATCH * PAGE_SIZE / 2; } } /* folio_update_gen() requires stable folio_memcg() */ if (!mem_cgroup_trylock_pages(memcg)) return; arch_enter_lazy_mmu_mode(); pte -= (addr - start) / PAGE_SIZE; for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { unsigned long pfn; pte_t ptent = ptep_get(pte + i); pfn = get_pte_pfn(ptent, vma, addr); if (pfn == -1) continue; if (!pte_young(ptent)) continue; folio = get_pfn_folio(pfn, memcg, pgdat, can_swap); if (!folio) continue; if (!ptep_test_and_clear_young(vma, addr, pte + i)) VM_WARN_ON_ONCE(true); young++; if (pte_dirty(ptent) && !folio_test_dirty(folio) && !(folio_test_anon(folio) && folio_test_swapbacked(folio) && !folio_test_swapcache(folio))) folio_mark_dirty(folio); if (walk) { old_gen = folio_update_gen(folio, new_gen); if (old_gen >= 0 && old_gen != new_gen) update_batch_size(walk, folio, old_gen, new_gen); continue; } old_gen = folio_lru_gen(folio); if (old_gen < 0) folio_set_referenced(folio); else if (old_gen != new_gen) folio_activate(folio); } arch_leave_lazy_mmu_mode(); mem_cgroup_unlock_pages(); /* feedback from rmap walkers to page table walkers */ if (mm_state && suitable_to_scan(i, young)) update_bloom_filter(mm_state, max_seq, pvmw->pmd); } /****************************************************************************** * memcg LRU ******************************************************************************/ /* see the comment on MEMCG_NR_GENS */ enum { MEMCG_LRU_NOP, MEMCG_LRU_HEAD, MEMCG_LRU_TAIL, MEMCG_LRU_OLD, MEMCG_LRU_YOUNG, }; static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) { int seg; int old, new; unsigned long flags; int bin = get_random_u32_below(MEMCG_NR_BINS); struct pglist_data *pgdat = lruvec_pgdat(lruvec); spin_lock_irqsave(&pgdat->memcg_lru.lock, flags); VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list)); seg = 0; new = old = lruvec->lrugen.gen; /* see the comment on MEMCG_NR_GENS */ if (op == MEMCG_LRU_HEAD) seg = MEMCG_LRU_HEAD; else if (op == MEMCG_LRU_TAIL) seg = MEMCG_LRU_TAIL; else if (op == MEMCG_LRU_OLD) new = get_memcg_gen(pgdat->memcg_lru.seq); else if (op == MEMCG_LRU_YOUNG) new = get_memcg_gen(pgdat->memcg_lru.seq + 1); else VM_WARN_ON_ONCE(true); WRITE_ONCE(lruvec->lrugen.seg, seg); WRITE_ONCE(lruvec->lrugen.gen, new); hlist_nulls_del_rcu(&lruvec->lrugen.list); if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); else hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]); pgdat->memcg_lru.nr_memcgs[old]--; pgdat->memcg_lru.nr_memcgs[new]++; if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); spin_unlock_irqrestore(&pgdat->memcg_lru.lock, flags); } #ifdef CONFIG_MEMCG void lru_gen_online_memcg(struct mem_cgroup *memcg) { int gen; int nid; int bin = get_random_u32_below(MEMCG_NR_BINS); for_each_node(nid) { struct pglist_data *pgdat = NODE_DATA(nid); struct lruvec *lruvec = get_lruvec(memcg, nid); spin_lock_irq(&pgdat->memcg_lru.lock); VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list)); gen = get_memcg_gen(pgdat->memcg_lru.seq); lruvec->lrugen.gen = gen; hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); pgdat->memcg_lru.nr_memcgs[gen]++; spin_unlock_irq(&pgdat->memcg_lru.lock); } } void lru_gen_offline_memcg(struct mem_cgroup *memcg) { int nid; for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD); } } void lru_gen_release_memcg(struct mem_cgroup *memcg) { int gen; int nid; for_each_node(nid) { struct pglist_data *pgdat = NODE_DATA(nid); struct lruvec *lruvec = get_lruvec(memcg, nid); spin_lock_irq(&pgdat->memcg_lru.lock); if (hlist_nulls_unhashed(&lruvec->lrugen.list)) goto unlock; gen = lruvec->lrugen.gen; hlist_nulls_del_init_rcu(&lruvec->lrugen.list); pgdat->memcg_lru.nr_memcgs[gen]--; if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq)) WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); unlock: spin_unlock_irq(&pgdat->memcg_lru.lock); } } void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); /* see the comment on MEMCG_NR_GENS */ if (READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_HEAD) lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD); } #endif /* CONFIG_MEMCG */ /****************************************************************************** * the eviction ******************************************************************************/ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc, int tier_idx) { bool success; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); int delta = folio_nr_pages(folio); int refs = folio_lru_refs(folio); int tier = lru_tier_from_refs(refs); struct lru_gen_folio *lrugen = &lruvec->lrugen; VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio); /* unevictable */ if (!folio_evictable(folio)) { success = lru_gen_del_folio(lruvec, folio, true); VM_WARN_ON_ONCE_FOLIO(!success, folio); folio_set_unevictable(folio); lruvec_add_folio(lruvec, folio); __count_vm_events(UNEVICTABLE_PGCULLED, delta); return true; } /* promoted */ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { list_move(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } /* protected */ if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { int hist = lru_hist_from_seq(lrugen->min_seq[type]); gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); WRITE_ONCE(lrugen->protected[hist][type][tier - 1], lrugen->protected[hist][type][tier - 1] + delta); return true; } /* ineligible */ if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } /* waiting for writeback */ if (folio_test_locked(folio) || folio_test_writeback(folio) || (type == LRU_GEN_FILE && folio_test_dirty(folio))) { gen = folio_inc_gen(lruvec, folio, true); list_move(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } return false; } static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc) { bool success; /* swap constrained */ if (!(sc->gfp_mask & __GFP_IO) && (folio_test_dirty(folio) || (folio_test_anon(folio) && !folio_test_swapcache(folio)))) return false; /* raced with release_pages() */ if (!folio_try_get(folio)) return false; /* raced with another isolation */ if (!folio_test_clear_lru(folio)) { folio_put(folio); return false; } /* see the comment on MAX_NR_TIERS */ if (!folio_test_referenced(folio)) set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0); /* for shrink_folio_list() */ folio_clear_reclaim(folio); folio_clear_referenced(folio); success = lru_gen_del_folio(lruvec, folio, true); VM_WARN_ON_ONCE_FOLIO(!success, folio); return true; } static int scan_folios(struct lruvec *lruvec, struct scan_control *sc, int type, int tier, struct list_head *list) { int i; int gen; enum vm_event_item item; int sorted = 0; int scanned = 0; int isolated = 0; int skipped = 0; int remaining = MAX_LRU_BATCH; struct lru_gen_folio *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); VM_WARN_ON_ONCE(!list_empty(list)); if (get_nr_gens(lruvec, type) == MIN_NR_GENS) return 0; gen = lru_gen_from_seq(lrugen->min_seq[type]); for (i = MAX_NR_ZONES; i > 0; i--) { LIST_HEAD(moved); int skipped_zone = 0; int zone = (sc->reclaim_idx + i) % MAX_NR_ZONES; struct list_head *head = &lrugen->folios[gen][type][zone]; while (!list_empty(head)) { struct folio *folio = lru_to_folio(head); int delta = folio_nr_pages(folio); VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); scanned += delta; if (sort_folio(lruvec, folio, sc, tier)) sorted += delta; else if (isolate_folio(lruvec, folio, sc)) { list_add(&folio->lru, list); isolated += delta; } else { list_move(&folio->lru, &moved); skipped_zone += delta; } if (!--remaining || max(isolated, skipped_zone) >= MIN_LRU_BATCH) break; } if (skipped_zone) { list_splice(&moved, head); __count_zid_vm_events(PGSCAN_SKIP, zone, skipped_zone); skipped += skipped_zone; } if (!remaining || isolated >= MIN_LRU_BATCH) break; } item = PGSCAN_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) { __count_vm_events(item, isolated); __count_vm_events(PGREFILL, sorted); } __count_memcg_events(memcg, item, isolated); __count_memcg_events(memcg, PGREFILL, sorted); __count_vm_events(PGSCAN_ANON + type, isolated); trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, MAX_LRU_BATCH, scanned, skipped, isolated, type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); /* * There might not be eligible folios due to reclaim_idx. Check the * remaining to prevent livelock if it's not making progress. */ return isolated || !remaining ? scanned : 0; } static int get_tier_idx(struct lruvec *lruvec, int type) { int tier; struct ctrl_pos sp, pv; /* * To leave a margin for fluctuations, use a larger gain factor (1:2). * This value is chosen because any other tier would have at least twice * as many refaults as the first tier. */ read_ctrl_pos(lruvec, type, 0, 1, &sp); for (tier = 1; tier < MAX_NR_TIERS; tier++) { read_ctrl_pos(lruvec, type, tier, 2, &pv); if (!positive_ctrl_err(&sp, &pv)) break; } return tier - 1; } static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx) { int type, tier; struct ctrl_pos sp, pv; int gain[ANON_AND_FILE] = { swappiness, MAX_SWAPPINESS - swappiness }; /* * Compare the first tier of anon with that of file to determine which * type to scan. Also need to compare other tiers of the selected type * with the first tier of the other type to determine the last tier (of * the selected type) to evict. */ read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp); read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv); type = positive_ctrl_err(&sp, &pv); read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp); for (tier = 1; tier < MAX_NR_TIERS; tier++) { read_ctrl_pos(lruvec, type, tier, gain[type], &pv); if (!positive_ctrl_err(&sp, &pv)) break; } *tier_idx = tier - 1; return type; } static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness, int *type_scanned, struct list_head *list) { int i; int type; int scanned; int tier = -1; DEFINE_MIN_SEQ(lruvec); /* * Try to make the obvious choice first, and if anon and file are both * available from the same generation, * 1. Interpret swappiness 1 as file first and MAX_SWAPPINESS as anon * first. * 2. If !__GFP_IO, file first since clean pagecache is more likely to * exist than clean swapcache. */ if (!swappiness) type = LRU_GEN_FILE; else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) type = LRU_GEN_ANON; else if (swappiness == 1) type = LRU_GEN_FILE; else if (swappiness == MAX_SWAPPINESS) type = LRU_GEN_ANON; else if (!(sc->gfp_mask & __GFP_IO)) type = LRU_GEN_FILE; else type = get_type_to_scan(lruvec, swappiness, &tier); for (i = !swappiness; i < ANON_AND_FILE; i++) { if (tier < 0) tier = get_tier_idx(lruvec, type); scanned = scan_folios(lruvec, sc, type, tier, list); if (scanned) break; type = !type; tier = -1; } *type_scanned = type; return scanned; } static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness) { int type; int scanned; int reclaimed; LIST_HEAD(list); LIST_HEAD(clean); struct folio *folio; struct folio *next; enum vm_event_item item; struct reclaim_stat stat; struct lru_gen_mm_walk *walk; bool skip_retry = false; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); spin_lock_irq(&lruvec->lru_lock); scanned = isolate_folios(lruvec, sc, swappiness, &type, &list); scanned += try_to_inc_min_seq(lruvec, swappiness); if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS) scanned = 0; spin_unlock_irq(&lruvec->lru_lock); if (list_empty(&list)) return scanned; retry: reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); sc->nr_reclaimed += reclaimed; trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id, scanned, reclaimed, &stat, sc->priority, type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON); list_for_each_entry_safe_reverse(folio, next, &list, lru) { if (!folio_evictable(folio)) { list_del(&folio->lru); folio_putback_lru(folio); continue; } if (folio_test_reclaim(folio) && (folio_test_dirty(folio) || folio_test_writeback(folio))) { /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ if (folio_test_workingset(folio)) folio_set_referenced(folio); continue; } if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) || folio_mapped(folio) || folio_test_locked(folio) || folio_test_dirty(folio) || folio_test_writeback(folio)) { /* don't add rejected folios to the oldest generation */ set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, BIT(PG_active)); continue; } /* retry folios that may have missed folio_rotate_reclaimable() */ list_move(&folio->lru, &clean); } spin_lock_irq(&lruvec->lru_lock); move_folios_to_lru(lruvec, &list); walk = current->reclaim_state->mm_walk; if (walk && walk->batched) { walk->lruvec = lruvec; reset_batch_size(walk); } item = PGSTEAL_KSWAPD + reclaimer_offset(); if (!cgroup_reclaim(sc)) __count_vm_events(item, reclaimed); __count_memcg_events(memcg, item, reclaimed); __count_vm_events(PGSTEAL_ANON + type, reclaimed); spin_unlock_irq(&lruvec->lru_lock); list_splice_init(&clean, &list); if (!list_empty(&list)) { skip_retry = true; goto retry; } return scanned; } static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, bool can_swap, unsigned long *nr_to_scan) { int gen, type, zone; unsigned long old = 0; unsigned long young = 0; unsigned long total = 0; struct lru_gen_folio *lrugen = &lruvec->lrugen; DEFINE_MIN_SEQ(lruvec); /* whether this lruvec is completely out of cold folios */ if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) { *nr_to_scan = 0; return true; } for (type = !can_swap; type < ANON_AND_FILE; type++) { unsigned long seq; for (seq = min_seq[type]; seq <= max_seq; seq++) { unsigned long size = 0; gen = lru_gen_from_seq(seq); for (zone = 0; zone < MAX_NR_ZONES; zone++) size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L); total += size; if (seq == max_seq) young += size; else if (seq + MIN_NR_GENS == max_seq) old += size; } } *nr_to_scan = total; /* * The aging tries to be lazy to reduce the overhead, while the eviction * stalls when the number of generations reaches MIN_NR_GENS. Hence, the * ideal number of generations is MIN_NR_GENS+1. */ if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) return false; /* * It's also ideal to spread pages out evenly, i.e., 1/(MIN_NR_GENS+1) * of the total number of pages for each generation. A reasonable range * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The * aging cares about the upper bound of hot pages, while the eviction * cares about the lower bound of cold pages. */ if (young * MIN_NR_GENS > total) return true; if (old * (MIN_NR_GENS + 2) < total) return true; return false; } /* * For future optimizations: * 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg * reclaim. */ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap) { bool success; unsigned long nr_to_scan; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) return -1; success = should_run_aging(lruvec, max_seq, can_swap, &nr_to_scan); /* try to scrape all its memory if this memcg was deleted */ if (nr_to_scan && !mem_cgroup_online(memcg)) return nr_to_scan; /* try to get away with not aging at the default priority */ if (!success || sc->priority == DEF_PRIORITY) return nr_to_scan >> sc->priority; /* stop scanning this lruvec as it's low on cold folios */ return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0; } static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc) { int i; enum zone_watermarks mark; /* don't abort memcg reclaim to ensure fairness */ if (!root_reclaim(sc)) return false; if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order))) return true; /* check the order to exclude compaction-induced reclaim */ if (!current_is_kswapd() || sc->order) return false; mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ? WMARK_PROMO : WMARK_HIGH; for (i = 0; i <= sc->reclaim_idx; i++) { struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i; unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH; if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0)) return false; } /* kswapd should abort if all eligible zones are safe */ return true; } static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { long nr_to_scan; unsigned long scanned = 0; int swappiness = get_swappiness(lruvec, sc); while (true) { int delta; nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); if (nr_to_scan <= 0) break; delta = evict_folios(lruvec, sc, swappiness); if (!delta) break; scanned += delta; if (scanned >= nr_to_scan) break; if (should_abort_scan(lruvec, sc)) break; cond_resched(); } /* whether this lruvec should be rotated */ return nr_to_scan < 0; } static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) { bool success; unsigned long scanned = sc->nr_scanned; unsigned long reclaimed = sc->nr_reclaimed; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; if (mem_cgroup_below_low(NULL, memcg)) { /* see the comment on MEMCG_NR_GENS */ if (READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_TAIL) return MEMCG_LRU_TAIL; memcg_memory_event(memcg, MEMCG_LOW); } success = try_to_shrink_lruvec(lruvec, sc); shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority); if (!sc->proactive) vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned, sc->nr_reclaimed - reclaimed); flush_reclaim_state(sc); if (success && mem_cgroup_online(memcg)) return MEMCG_LRU_YOUNG; if (!success && lruvec_is_sizable(lruvec, sc)) return 0; /* one retry if offlined or too small */ return READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; } static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc) { int op; int gen; int bin; int first_bin; struct lruvec *lruvec; struct lru_gen_folio *lrugen; struct mem_cgroup *memcg; struct hlist_nulls_node *pos; gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); restart: op = 0; memcg = NULL; rcu_read_lock(); hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) { if (op) { lru_gen_rotate_memcg(lruvec, op); op = 0; } mem_cgroup_put(memcg); memcg = NULL; if (gen != READ_ONCE(lrugen->gen)) continue; lruvec = container_of(lrugen, struct lruvec, lrugen); memcg = lruvec_memcg(lruvec); if (!mem_cgroup_tryget(memcg)) { lru_gen_release_memcg(memcg); memcg = NULL; continue; } rcu_read_unlock(); op = shrink_one(lruvec, sc); rcu_read_lock(); if (should_abort_scan(lruvec, sc)) break; } rcu_read_unlock(); if (op) lru_gen_rotate_memcg(lruvec, op); mem_cgroup_put(memcg); if (!is_a_nulls(pos)) return; /* restart if raced with lru_gen_rotate_memcg() */ if (gen != get_nulls_value(pos)) goto restart; /* try the rest of the bins of the current generation */ bin = get_memcg_bin(bin + 1); if (bin != first_bin) goto restart; } static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { struct blk_plug plug; VM_WARN_ON_ONCE(root_reclaim(sc)); VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap); lru_add_drain(); blk_start_plug(&plug); set_mm_walk(NULL, sc->proactive); if (try_to_shrink_lruvec(lruvec, sc)) lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG); clear_mm_walk(); blk_finish_plug(&plug); } static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc) { struct blk_plug plug; unsigned long reclaimed = sc->nr_reclaimed; VM_WARN_ON_ONCE(!root_reclaim(sc)); /* * Unmapped clean folios are already prioritized. Scanning for more of * them is likely futile and can cause high reclaim latency when there * is a large number of memcgs. */ if (!sc->may_writepage || !sc->may_unmap) goto done; lru_add_drain(); blk_start_plug(&plug); set_mm_walk(pgdat, sc->proactive); set_initial_priority(pgdat, sc); if (current_is_kswapd()) sc->nr_reclaimed = 0; if (mem_cgroup_disabled()) shrink_one(&pgdat->__lruvec, sc); else shrink_many(pgdat, sc); if (current_is_kswapd()) sc->nr_reclaimed += reclaimed; clear_mm_walk(); blk_finish_plug(&plug); done: /* kswapd should never fail */ pgdat->kswapd_failures = 0; } /****************************************************************************** * state change ******************************************************************************/ static bool __maybe_unused state_is_valid(struct lruvec *lruvec) { struct lru_gen_folio *lrugen = &lruvec->lrugen; if (lrugen->enabled) { enum lru_list lru; for_each_evictable_lru(lru) { if (!list_empty(&lruvec->lists[lru])) return false; } } else { int gen, type, zone; for_each_gen_type_zone(gen, type, zone) { if (!list_empty(&lrugen->folios[gen][type][zone])) return false; } } return true; } static bool fill_evictable(struct lruvec *lruvec) { enum lru_list lru; int remaining = MAX_LRU_BATCH; for_each_evictable_lru(lru) { int type = is_file_lru(lru); bool active = is_active_lru(lru); struct list_head *head = &lruvec->lists[lru]; while (!list_empty(head)) { bool success; struct folio *folio = lru_to_folio(head); VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio) != active, folio); VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); VM_WARN_ON_ONCE_FOLIO(folio_lru_gen(folio) != -1, folio); lruvec_del_folio(lruvec, folio); success = lru_gen_add_folio(lruvec, folio, false); VM_WARN_ON_ONCE(!success); if (!--remaining) return false; } } return true; } static bool drain_evictable(struct lruvec *lruvec) { int gen, type, zone; int remaining = MAX_LRU_BATCH; for_each_gen_type_zone(gen, type, zone) { struct list_head *head = &lruvec->lrugen.folios[gen][type][zone]; while (!list_empty(head)) { bool success; struct folio *folio = lru_to_folio(head); VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio); VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio); VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio); success = lru_gen_del_folio(lruvec, folio, false); VM_WARN_ON_ONCE(!success); lruvec_add_folio(lruvec, folio); if (!--remaining) return false; } } return true; } static void lru_gen_change_state(bool enabled) { static DEFINE_MUTEX(state_mutex); struct mem_cgroup *memcg; cgroup_lock(); cpus_read_lock(); get_online_mems(); mutex_lock(&state_mutex); if (enabled == lru_gen_enabled()) goto unlock; if (enabled) static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); else static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { int nid; for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); spin_lock_irq(&lruvec->lru_lock); VM_WARN_ON_ONCE(!seq_is_valid(lruvec)); VM_WARN_ON_ONCE(!state_is_valid(lruvec)); lruvec->lrugen.enabled = enabled; while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) { spin_unlock_irq(&lruvec->lru_lock); cond_resched(); spin_lock_irq(&lruvec->lru_lock); } spin_unlock_irq(&lruvec->lru_lock); } cond_resched(); } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); unlock: mutex_unlock(&state_mutex); put_online_mems(); cpus_read_unlock(); cgroup_unlock(); } /****************************************************************************** * sysfs interface ******************************************************************************/ static ssize_t min_ttl_ms_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl))); } /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ static ssize_t min_ttl_ms_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { unsigned int msecs; if (kstrtouint(buf, 0, &msecs)) return -EINVAL; WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs)); return len; } static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR_RW(min_ttl_ms); static ssize_t enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { unsigned int caps = 0; if (get_cap(LRU_GEN_CORE)) caps |= BIT(LRU_GEN_CORE); if (should_walk_mmu()) caps |= BIT(LRU_GEN_MM_WALK); if (should_clear_pmd_young()) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return sysfs_emit(buf, "0x%04x\n", caps); } /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { int i; unsigned int caps; if (tolower(*buf) == 'n') caps = 0; else if (tolower(*buf) == 'y') caps = -1; else if (kstrtouint(buf, 0, &caps)) return -EINVAL; for (i = 0; i < NR_LRU_GEN_CAPS; i++) { bool enabled = caps & BIT(i); if (i == LRU_GEN_CORE) lru_gen_change_state(enabled); else if (enabled) static_branch_enable(&lru_gen_caps[i]); else static_branch_disable(&lru_gen_caps[i]); } return len; } static struct kobj_attribute lru_gen_enabled_attr = __ATTR_RW(enabled); static struct attribute *lru_gen_attrs[] = { &lru_gen_min_ttl_attr.attr, &lru_gen_enabled_attr.attr, NULL }; static const struct attribute_group lru_gen_attr_group = { .name = "lru_gen", .attrs = lru_gen_attrs, }; /****************************************************************************** * debugfs interface ******************************************************************************/ static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos) { struct mem_cgroup *memcg; loff_t nr_to_skip = *pos; m->private = kvmalloc(PATH_MAX, GFP_KERNEL); if (!m->private) return ERR_PTR(-ENOMEM); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { int nid; for_each_node_state(nid, N_MEMORY) { if (!nr_to_skip--) return get_lruvec(memcg, nid); } } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); return NULL; } static void lru_gen_seq_stop(struct seq_file *m, void *v) { if (!IS_ERR_OR_NULL(v)) mem_cgroup_iter_break(NULL, lruvec_memcg(v)); kvfree(m->private); m->private = NULL; } static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos) { int nid = lruvec_pgdat(v)->node_id; struct mem_cgroup *memcg = lruvec_memcg(v); ++*pos; nid = next_memory_node(nid); if (nid == MAX_NUMNODES) { memcg = mem_cgroup_iter(NULL, memcg, NULL); if (!memcg) return NULL; nid = first_memory_node; } return get_lruvec(memcg, nid); } static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, unsigned long seq) { int i; int type, tier; int hist = lru_hist_from_seq(seq); struct lru_gen_folio *lrugen = &lruvec->lrugen; struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); for (tier = 0; tier < MAX_NR_TIERS; tier++) { seq_printf(m, " %10d", tier); for (type = 0; type < ANON_AND_FILE; type++) { const char *s = " "; unsigned long n[3] = {}; if (seq == max_seq) { s = "RT "; n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); n[1] = READ_ONCE(lrugen->avg_total[type][tier]); } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { s = "rep"; n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]); n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]); if (tier) n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]); } for (i = 0; i < 3; i++) seq_printf(m, " %10lu%c", n[i], s[i]); } seq_putc(m, '\n'); } if (!mm_state) return; seq_puts(m, " "); for (i = 0; i < NR_MM_STATS; i++) { const char *s = " "; unsigned long n = 0; if (seq == max_seq && NR_HIST_GENS == 1) { s = "LOYNFA"; n = READ_ONCE(mm_state->stats[hist][i]); } else if (seq != max_seq && NR_HIST_GENS > 1) { s = "loynfa"; n = READ_ONCE(mm_state->stats[hist][i]); } seq_printf(m, " %10lu%c", n, s[i]); } seq_putc(m, '\n'); } /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ static int lru_gen_seq_show(struct seq_file *m, void *v) { unsigned long seq; bool full = !debugfs_real_fops(m->file)->write; struct lruvec *lruvec = v; struct lru_gen_folio *lrugen = &lruvec->lrugen; int nid = lruvec_pgdat(lruvec)->node_id; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec); if (nid == first_memory_node) { const char *path = memcg ? m->private : ""; #ifdef CONFIG_MEMCG if (memcg) cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); #endif seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path); } seq_printf(m, " node %5d\n", nid); if (!full) seq = min_seq[LRU_GEN_ANON]; else if (max_seq >= MAX_NR_GENS) seq = max_seq - MAX_NR_GENS + 1; else seq = 0; for (; seq <= max_seq; seq++) { int type, zone; int gen = lru_gen_from_seq(seq); unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth)); for (type = 0; type < ANON_AND_FILE; type++) { unsigned long size = 0; char mark = full && seq < min_seq[type] ? 'x' : ' '; for (zone = 0; zone < MAX_NR_ZONES; zone++) size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L); seq_printf(m, " %10lu%c", size, mark); } seq_putc(m, '\n'); if (full) lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq); } return 0; } static const struct seq_operations lru_gen_seq_ops = { .start = lru_gen_seq_start, .stop = lru_gen_seq_stop, .next = lru_gen_seq_next, .show = lru_gen_seq_show, }; static int run_aging(struct lruvec *lruvec, unsigned long seq, bool can_swap, bool force_scan) { DEFINE_MAX_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec); if (seq < max_seq) return 0; if (seq > max_seq) return -EINVAL; if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq) return -ERANGE; try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan); return 0; } static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc, int swappiness, unsigned long nr_to_reclaim) { DEFINE_MAX_SEQ(lruvec); if (seq + MIN_NR_GENS > max_seq) return -EINVAL; sc->nr_reclaimed = 0; while (!signal_pending(current)) { DEFINE_MIN_SEQ(lruvec); if (seq < min_seq[!swappiness]) return 0; if (sc->nr_reclaimed >= nr_to_reclaim) return 0; if (!evict_folios(lruvec, sc, swappiness)) return 0; cond_resched(); } return -EINTR; } static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, struct scan_control *sc, int swappiness, unsigned long opt) { struct lruvec *lruvec; int err = -EINVAL; struct mem_cgroup *memcg = NULL; if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) return -EINVAL; if (!mem_cgroup_disabled()) { rcu_read_lock(); memcg = mem_cgroup_from_id(memcg_id); if (!mem_cgroup_tryget(memcg)) memcg = NULL; rcu_read_unlock(); if (!memcg) return -EINVAL; } if (memcg_id != mem_cgroup_id(memcg)) goto done; lruvec = get_lruvec(memcg, nid); if (swappiness < MIN_SWAPPINESS) swappiness = get_swappiness(lruvec, sc); else if (swappiness > MAX_SWAPPINESS) goto done; switch (cmd) { case '+': err = run_aging(lruvec, seq, swappiness, opt); break; case '-': err = run_eviction(lruvec, seq, sc, swappiness, opt); break; } done: mem_cgroup_put(memcg); return err; } /* see Documentation/admin-guide/mm/multigen_lru.rst for details */ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, size_t len, loff_t *pos) { void *buf; char *cur, *next; unsigned int flags; struct blk_plug plug; int err = -EINVAL; struct scan_control sc = { .may_writepage = true, .may_unmap = true, .may_swap = true, .reclaim_idx = MAX_NR_ZONES - 1, .gfp_mask = GFP_KERNEL, }; buf = kvmalloc(len + 1, GFP_KERNEL); if (!buf) return -ENOMEM; if (copy_from_user(buf, src, len)) { kvfree(buf); return -EFAULT; } set_task_reclaim_state(current, &sc.reclaim_state); flags = memalloc_noreclaim_save(); blk_start_plug(&plug); if (!set_mm_walk(NULL, true)) { err = -ENOMEM; goto done; } next = buf; next[len] = '\0'; while ((cur = strsep(&next, ",;\n"))) { int n; int end; char cmd; unsigned int memcg_id; unsigned int nid; unsigned long seq; unsigned int swappiness = -1; unsigned long opt = -1; cur = skip_spaces(cur); if (!*cur) continue; n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid, &seq, &end, &swappiness, &end, &opt, &end); if (n < 4 || cur[end]) { err = -EINVAL; break; } err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt); if (err) break; } done: clear_mm_walk(); blk_finish_plug(&plug); memalloc_noreclaim_restore(flags); set_task_reclaim_state(current, NULL); kvfree(buf); return err ? : len; } static int lru_gen_seq_open(struct inode *inode, struct file *file) { return seq_open(file, &lru_gen_seq_ops); } static const struct file_operations lru_gen_rw_fops = { .open = lru_gen_seq_open, .read = seq_read, .write = lru_gen_seq_write, .llseek = seq_lseek, .release = seq_release, }; static const struct file_operations lru_gen_ro_fops = { .open = lru_gen_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; /****************************************************************************** * initialization ******************************************************************************/ void lru_gen_init_pgdat(struct pglist_data *pgdat) { int i, j; spin_lock_init(&pgdat->memcg_lru.lock); for (i = 0; i < MEMCG_NR_GENS; i++) { for (j = 0; j < MEMCG_NR_BINS; j++) INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i); } } void lru_gen_init_lruvec(struct lruvec *lruvec) { int i; int gen, type, zone; struct lru_gen_folio *lrugen = &lruvec->lrugen; struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); lrugen->max_seq = MIN_NR_GENS + 1; lrugen->enabled = lru_gen_enabled(); for (i = 0; i <= MIN_NR_GENS + 1; i++) lrugen->timestamps[i] = jiffies; for_each_gen_type_zone(gen, type, zone) INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]); if (mm_state) mm_state->seq = MIN_NR_GENS; } #ifdef CONFIG_MEMCG void lru_gen_init_memcg(struct mem_cgroup *memcg) { struct lru_gen_mm_list *mm_list = get_mm_list(memcg); if (!mm_list) return; INIT_LIST_HEAD(&mm_list->fifo); spin_lock_init(&mm_list->lock); } void lru_gen_exit_memcg(struct mem_cgroup *memcg) { int i; int nid; struct lru_gen_mm_list *mm_list = get_mm_list(memcg); VM_WARN_ON_ONCE(mm_list && !list_empty(&mm_list->fifo)); for_each_node(nid) { struct lruvec *lruvec = get_lruvec(memcg, nid); struct lru_gen_mm_state *mm_state = get_mm_state(lruvec); VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0, sizeof(lruvec->lrugen.nr_pages))); lruvec->lrugen.list.next = LIST_POISON1; if (!mm_state) continue; for (i = 0; i < NR_BLOOM_FILTERS; i++) { bitmap_free(mm_state->filters[i]); mm_state->filters[i] = NULL; } } } #endif /* CONFIG_MEMCG */ static int __init init_lru_gen(void) { BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS); BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) pr_err("lru_gen: failed to create sysfs group\n"); debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops); debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops); return 0; }; late_initcall(init_lru_gen); #else /* !CONFIG_LRU_GEN */ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { BUILD_BUG(); } static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { BUILD_BUG(); } static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc) { BUILD_BUG(); } #endif /* CONFIG_LRU_GEN */ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; unsigned long targets[NR_LRU_LISTS]; unsigned long nr_to_scan; enum lru_list lru; unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; bool proportional_reclaim; struct blk_plug plug; if (lru_gen_enabled() && !root_reclaim(sc)) { lru_gen_shrink_lruvec(lruvec, sc); return; } get_scan_count(lruvec, sc, nr); /* Record the original scan target for proportional adjustments later */ memcpy(targets, nr, sizeof(nr)); /* * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal * event that can occur when there is little memory pressure e.g. * multiple streaming readers/writers. Hence, we do not abort scanning * when the requested number of pages are reclaimed when scanning at * DEF_PRIORITY on the assumption that the fact we are direct * reclaiming implies that kswapd is not keeping up and it is best to * do a batch of work at once. For memcg reclaim one check is made to * abort proportional reclaim if either the file or anon lru has already * dropped to zero at the first pass. */ proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() && sc->priority == DEF_PRIORITY); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { unsigned long nr_anon, nr_file, percentage; unsigned long nr_scanned; for_each_evictable_lru(lru) { if (nr[lru]) { nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, lruvec, sc); } } cond_resched(); if (nr_reclaimed < nr_to_reclaim || proportional_reclaim) continue; /* * For kswapd and memcg, reclaim at least the number of pages * requested. Ensure that the anon and file LRUs are scanned * proportionally what was requested by get_scan_count(). We * stop reclaiming one LRU and reduce the amount scanning * proportional to the original scan target. */ nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; /* * It's just vindictive to attack the larger once the smaller * has gone to zero. And given the way we stop scanning the * smaller below, this makes sure that we only make one nudge * towards proportionality once we've got nr_to_reclaim. */ if (!nr_file || !nr_anon) break; if (nr_file > nr_anon) { unsigned long scan_target = targets[LRU_INACTIVE_ANON] + targets[LRU_ACTIVE_ANON] + 1; lru = LRU_BASE; percentage = nr_anon * 100 / scan_target; } else { unsigned long scan_target = targets[LRU_INACTIVE_FILE] + targets[LRU_ACTIVE_FILE] + 1; lru = LRU_FILE; percentage = nr_file * 100 / scan_target; } /* Stop scanning the smaller of the LRU */ nr[lru] = 0; nr[lru + LRU_ACTIVE] = 0; /* * Recalculate the other LRU scan count based on its original * scan target and the percentage scanning already complete */ lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); lru += LRU_ACTIVE; nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); } blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; /* * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ if (can_age_anon_pages(lruvec_pgdat(lruvec), sc) && inactive_is_low(lruvec, LRU_INACTIVE_ANON)) shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); } /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { if (gfp_compaction_allowed(sc->gfp_mask) && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || sc->priority < DEF_PRIORITY - 2)) return true; return false; } /* * Reclaim/compaction is used for high-order allocation requests. It reclaims * order-0 pages before compacting the zone. should_continue_reclaim() returns * true if more pages should be reclaimed such that when the page allocator * calls try_to_compact_pages() that it will have enough free pages to succeed. * It will give up earlier than that if there is difficulty reclaiming pages. */ static inline bool should_continue_reclaim(struct pglist_data *pgdat, unsigned long nr_reclaimed, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; int z; /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) return false; /* * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX * number of pages that were scanned. This will return to the caller * with the risk reclaim/compaction and the resulting allocation attempt * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL * allocations through requiring that the full LRU list has been scanned * first, by assuming that zero delta of sc->nr_scanned means full LRU * scan, but that approximation was wrong, and there were corner cases * where always a non-zero amount of pages were scanned. */ if (!nr_reclaimed) return false; /* If compaction would go ahead or the allocation would succeed, stop */ for (z = 0; z <= sc->reclaim_idx; z++) { struct zone *zone = &pgdat->node_zones[z]; if (!managed_zone(zone)) continue; /* Allocation can already succeed, nothing to do */ if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone), sc->reclaim_idx, 0)) return false; if (compaction_suitable(zone, sc->order, sc->reclaim_idx)) return false; } /* * If we have not reclaimed enough pages for compaction and the * inactive lists are large enough, continue reclaiming */ pages_for_compaction = compact_gap(sc->order); inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE); if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc)) inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON); return inactive_lru_pages > pages_for_compaction; } static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) { struct mem_cgroup *target_memcg = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { .pgdat = pgdat, }; struct mem_cgroup_reclaim_cookie *partial = &reclaim; struct mem_cgroup *memcg; /* * In most cases, direct reclaimers can do partial walks * through the cgroup tree, using an iterator state that * persists across invocations. This strikes a balance between * fairness and allocation latency. * * For kswapd, reliable forward progress is more important * than a quick return to idle. Always do full walks. */ if (current_is_kswapd() || sc->memcg_full_walk) partial = NULL; memcg = mem_cgroup_iter(target_memcg, NULL, partial); do { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); unsigned long reclaimed; unsigned long scanned; /* * This loop can become CPU-bound when target memcgs * aren't eligible for reclaim - either because they * don't have any reclaimable pages, or because their * memory is explicitly protected. Avoid soft lockups. */ cond_resched(); mem_cgroup_calculate_protection(target_memcg, memcg); if (mem_cgroup_below_min(target_memcg, memcg)) { /* * Hard protection. * If there is no reclaimable memory, OOM. */ continue; } else if (mem_cgroup_below_low(target_memcg, memcg)) { /* * Soft protection. * Respect the protection only as long as * there is an unprotected supply * of reclaimable memory from other cgroups. */ if (!sc->memcg_low_reclaim) { sc->memcg_low_skipped = 1; continue; } memcg_memory_event(memcg, MEMCG_LOW); } reclaimed = sc->nr_reclaimed; scanned = sc->nr_scanned; shrink_lruvec(lruvec, sc); shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority); /* Record the group's reclaim efficiency */ if (!sc->proactive) vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned, sc->nr_reclaimed - reclaimed); /* If partial walks are allowed, bail once goal is reached */ if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) { mem_cgroup_iter_break(target_memcg, memcg); break; } } while ((memcg = mem_cgroup_iter(target_memcg, memcg, partial))); } static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) { unsigned long nr_reclaimed, nr_scanned, nr_node_reclaimed; struct lruvec *target_lruvec; bool reclaimable = false; if (lru_gen_enabled() && root_reclaim(sc)) { lru_gen_shrink_node(pgdat, sc); return; } target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); again: memset(&sc->nr, 0, sizeof(sc->nr)); nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; prepare_scan_control(pgdat, sc); shrink_node_memcgs(pgdat, sc); flush_reclaim_state(sc); nr_node_reclaimed = sc->nr_reclaimed - nr_reclaimed; /* Record the subtree's reclaim efficiency */ if (!sc->proactive) vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true, sc->nr_scanned - nr_scanned, nr_node_reclaimed); if (nr_node_reclaimed) reclaimable = true; if (current_is_kswapd()) { /* * If reclaim is isolating dirty pages under writeback, * it implies that the long-lived page allocation rate * is exceeding the page laundering rate. Either the * global limits are not being effective at throttling * processes due to the page distribution throughout * zones or there is heavy usage of a slow backing * device. The only option is to throttle from reclaim * context which is not ideal as there is no guarantee * the dirtying process is throttled in the same way * balance_dirty_pages() manages. * * Once a node is flagged PGDAT_WRITEBACK, kswapd will * count the number of pages under pages flagged for * immediate reclaim and stall if any are encountered * in the nr_immediate check below. */ if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken) set_bit(PGDAT_WRITEBACK, &pgdat->flags); /* Allow kswapd to start writing pages during reclaim.*/ if (sc->nr.unqueued_dirty == sc->nr.file_taken) set_bit(PGDAT_DIRTY, &pgdat->flags); /* * If kswapd scans pages marked for immediate * reclaim and under writeback (nr_immediate), it * implies that pages are cycling through the LRU * faster than they are written so forcibly stall * until some pages complete writeback. */ if (sc->nr.immediate) reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); } /* * Tag a node/memcg as congested if all the dirty pages were marked * for writeback and immediate reclaim (counted in nr.congested). * * Legacy memcg will stall in page writeback so avoid forcibly * stalling in reclaim_throttle(). */ if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested) { if (cgroup_reclaim(sc) && writeback_throttling_sane(sc)) set_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags); if (current_is_kswapd()) set_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags); } /* * Stall direct reclaim for IO completions if the lruvec is * node is congested. Allow kswapd to continue until it * starts encountering unqueued dirty pages or cycling through * the LRU too quickly. */ if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && (test_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags) || test_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags))) reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED); if (should_continue_reclaim(pgdat, nr_node_reclaimed, sc)) goto again; /* * Kswapd gives up on balancing particular nodes after too * many failures to reclaim anything from them and goes to * sleep. On reclaim progress, reset the failure counter. A * successful direct reclaim run will revive a dormant kswapd. */ if (reclaimable) pgdat->kswapd_failures = 0; else if (sc->cache_trim_mode) sc->cache_trim_mode_failed = 1; } /* * Returns true if compaction should go ahead for a costly-order request, or * the allocation would already succeed without compaction. Return false if we * should reclaim first. */ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) { unsigned long watermark; if (!gfp_compaction_allowed(sc->gfp_mask)) return false; /* Allocation can already succeed, nothing to do */ if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone), sc->reclaim_idx, 0)) return true; /* Compaction cannot yet proceed. Do reclaim. */ if (!compaction_suitable(zone, sc->order, sc->reclaim_idx)) return false; /* * Compaction is already possible, but it takes time to run and there * are potentially other callers using the pages just freed. So proceed * with reclaim to make a buffer of free pages available to give * compaction a reasonable chance of completing and allocating the page. * Note that we won't actually reclaim the whole buffer in one attempt * as the target watermark in should_continue_reclaim() is lower. But if * we are already above the high+gap watermark, don't reclaim at all. */ watermark = high_wmark_pages(zone) + compact_gap(sc->order); return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx); } static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) { /* * If reclaim is making progress greater than 12% efficiency then * wake all the NOPROGRESS throttled tasks. */ if (sc->nr_reclaimed > (sc->nr_scanned >> 3)) { wait_queue_head_t *wqh; wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_NOPROGRESS]; if (waitqueue_active(wqh)) wake_up(wqh); return; } /* * Do not throttle kswapd or cgroup reclaim on NOPROGRESS as it will * throttle on VMSCAN_THROTTLE_WRITEBACK if there are too many pages * under writeback and marked for immediate reclaim at the tail of the * LRU. */ if (current_is_kswapd() || cgroup_reclaim(sc)) return; /* Throttle if making no progress at high prioities. */ if (sc->priority == 1 && !sc->nr_reclaimed) reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS); } /* * This is the direct reclaim path, for page-allocating processes. We only * try to reclaim pages from zones which will satisfy the caller's allocation * request. * * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. */ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; gfp_t orig_mask; pg_data_t *last_pgdat = NULL; pg_data_t *first_pgdat = NULL; /* * If the number of buffer_heads in the machine exceeds the maximum * allowed level, force direct reclaim to scan the highmem zone as * highmem pages could be pinning lowmem pages storing buffer_heads */ orig_mask = sc->gfp_mask; if (buffer_heads_over_limit) { sc->gfp_mask |= __GFP_HIGHMEM; sc->reclaim_idx = gfp_zone(sc->gfp_mask); } for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, sc->nodemask) { /* * Take care memory controller reclaiming has small influence * to global LRU. */ if (!cgroup_reclaim(sc)) { if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL)) continue; /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. * Even though compaction is invoked for any * non-zero order, only frequent costly order * reclamation is disruptive enough to become a * noticeable problem, like transparent huge * page allocations. */ if (IS_ENABLED(CONFIG_COMPACTION) && sc->order > PAGE_ALLOC_COSTLY_ORDER && compaction_ready(zone, sc)) { sc->compaction_ready = true; continue; } /* * Shrink each node in the zonelist once. If the * zonelist is ordered by zone (not the default) then a * node may be shrunk multiple times but in that case * the user prefers lower zones being preserved. */ if (zone->zone_pgdat == last_pgdat) continue; /* * This steals pages from memory cgroups over softlimit * and returns the number of reclaimed pages and * scanned pages. This works for global memory pressure * and balancing, not for a memcg's limit. */ nr_soft_scanned = 0; nr_soft_reclaimed = memcg1_soft_limit_reclaim(zone->zone_pgdat, sc->order, sc->gfp_mask, &nr_soft_scanned); sc->nr_reclaimed += nr_soft_reclaimed; sc->nr_scanned += nr_soft_scanned; /* need some check for avoid more shrink_zone() */ } if (!first_pgdat) first_pgdat = zone->zone_pgdat; /* See comment about same check for global reclaim above */ if (zone->zone_pgdat == last_pgdat) continue; last_pgdat = zone->zone_pgdat; shrink_node(zone->zone_pgdat, sc); } if (first_pgdat) consider_reclaim_throttle(first_pgdat, sc); /* * Restore to original mask to avoid the impact on the caller if we * promoted it to __GFP_HIGHMEM. */ sc->gfp_mask = orig_mask; } static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) { struct lruvec *target_lruvec; unsigned long refaults; if (lru_gen_enabled()) return; target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); target_lruvec->refaults[WORKINGSET_ANON] = refaults; refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_FILE); target_lruvec->refaults[WORKINGSET_FILE] = refaults; } /* * This is the main entry point to direct page reclaim. * * If a full scan of the inactive list fails to free enough memory then we * are "out of memory" and something needs to be killed. * * If the caller is !__GFP_FS then the probability of a failure is reasonably * high - the zone may be full of dirty or under-writeback pages, which this * caller can't do much about. We kick the writeback threads and take explicit * naps in the hope that some of these pages can be written. But if the * allocating task holds filesystem locks which prevent writeout this might not * work, and the allocation attempt will fail. * * returns: 0, if no pages reclaimed * else, the number of pages reclaimed */ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int initial_priority = sc->priority; pg_data_t *last_pgdat; struct zoneref *z; struct zone *zone; retry: delayacct_freepages_start(); if (!cgroup_reclaim(sc)) __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); do { if (!sc->proactive) vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, sc->priority); sc->nr_scanned = 0; shrink_zones(zonelist, sc); if (sc->nr_reclaimed >= sc->nr_to_reclaim) break; if (sc->compaction_ready) break; /* * If we're getting trouble reclaiming, start doing * writepage even in laptop mode. */ if (sc->priority < DEF_PRIORITY - 2) sc->may_writepage = 1; } while (--sc->priority >= 0); last_pgdat = NULL; for_each_zone_zonelist_nodemask(zone, z, zonelist, sc->reclaim_idx, sc->nodemask) { if (zone->zone_pgdat == last_pgdat) continue; last_pgdat = zone->zone_pgdat; snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat); if (cgroup_reclaim(sc)) { struct lruvec *lruvec; lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, zone->zone_pgdat); clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags); } } delayacct_freepages_end(); if (sc->nr_reclaimed) return sc->nr_reclaimed; /* Aborted reclaim to try compaction? don't OOM, then */ if (sc->compaction_ready) return 1; /* * In most cases, direct reclaimers can do partial walks * through the cgroup tree to meet the reclaim goal while * keeping latency low. Since the iterator state is shared * among all direct reclaim invocations (to retain fairness * among cgroups), though, high concurrency can result in * individual threads not seeing enough cgroups to make * meaningful forward progress. Avoid false OOMs in this case. */ if (!sc->memcg_full_walk) { sc->priority = initial_priority; sc->memcg_full_walk = 1; goto retry; } /* * We make inactive:active ratio decisions based on the node's * composition of memory, but a restrictive reclaim_idx or a * memory.low cgroup setting can exempt large amounts of * memory from reclaim. Neither of which are very common, so * instead of doing costly eligibility calculations of the * entire cgroup subtree up front, we assume the estimates are * good, and retry with forcible deactivation if that fails. */ if (sc->skipped_deactivate) { sc->priority = initial_priority; sc->force_deactivate = 1; sc->skipped_deactivate = 0; goto retry; } /* Untapped cgroup reserves? Don't OOM, retry. */ if (sc->memcg_low_skipped) { sc->priority = initial_priority; sc->force_deactivate = 0; sc->memcg_low_reclaim = 1; sc->memcg_low_skipped = 0; goto retry; } return 0; } static bool allow_direct_reclaim(pg_data_t *pgdat) { struct zone *zone; unsigned long pfmemalloc_reserve = 0; unsigned long free_pages = 0; int i; bool wmark_ok; if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) return true; for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; if (!managed_zone(zone)) continue; if (!zone_reclaimable_pages(zone)) continue; pfmemalloc_reserve += min_wmark_pages(zone); free_pages += zone_page_state_snapshot(zone, NR_FREE_PAGES); } /* If there are no reserves (unexpected config) then do not throttle */ if (!pfmemalloc_reserve) return true; wmark_ok = free_pages > pfmemalloc_reserve / 2; /* kswapd must be awake if processes are being throttled */ if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) { if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL) WRITE_ONCE(pgdat->kswapd_highest_zoneidx, ZONE_NORMAL); wake_up_interruptible(&pgdat->kswapd_wait); } return wmark_ok; } /* * Throttle direct reclaimers if backing storage is backed by the network * and the PFMEMALLOC reserve for the preferred node is getting dangerously * depleted. kswapd will continue to make progress and wake the processes * when the low watermark is reached. * * Returns true if a fatal signal was delivered during throttling. If this * happens, the page allocator should not consider triggering the OOM killer. */ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, nodemask_t *nodemask) { struct zoneref *z; struct zone *zone; pg_data_t *pgdat = NULL; /* * Kernel threads should not be throttled as they may be indirectly * responsible for cleaning pages necessary for reclaim to make forward * progress. kjournald for example may enter direct reclaim while * committing a transaction where throttling it could forcing other * processes to block on log_wait_commit(). */ if (current->flags & PF_KTHREAD) goto out; /* * If a fatal signal is pending, this process should not throttle. * It should return quickly so it can exit and free its memory */ if (fatal_signal_pending(current)) goto out; /* * Check if the pfmemalloc reserves are ok by finding the first node * with a usable ZONE_NORMAL or lower zone. The expectation is that * GFP_KERNEL will be required for allocating network buffers when * swapping over the network so ZONE_HIGHMEM is unusable. * * Throttling is based on the first usable node and throttled processes * wait on a queue until kswapd makes progress and wakes them. There * is an affinity then between processes waking up and where reclaim * progress has been made assuming the process wakes on the same node. * More importantly, processes running on remote nodes will not compete * for remote pfmemalloc reserves and processes on different nodes * should make reasonable progress. */ for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nodemask) { if (zone_idx(zone) > ZONE_NORMAL) continue; /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; if (allow_direct_reclaim(pgdat)) goto out; break; } /* If no zone was usable by the allocation flags then do not throttle */ if (!pgdat) goto out; /* Account for the throttling */ count_vm_event(PGSCAN_DIRECT_THROTTLE); /* * If the caller cannot enter the filesystem, it's possible that it * is due to the caller holding an FS lock or performing a journal * transaction in the case of a filesystem like ext[3|4]. In this case, * it is not safe to block on pfmemalloc_wait as kswapd could be * blocked waiting on the same lock. Instead, throttle for up to a * second before continuing. */ if (!(gfp_mask & __GFP_FS)) wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, allow_direct_reclaim(pgdat), HZ); else /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, allow_direct_reclaim(pgdat)); if (fatal_signal_pending(current)) return true; out: return false; } unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask) { unsigned long nr_reclaimed; struct scan_control sc = { .nr_to_reclaim = SWAP_CLUSTER_MAX, .gfp_mask = current_gfp_context(gfp_mask), .reclaim_idx = gfp_zone(gfp_mask), .order = order, .nodemask = nodemask, .priority = DEF_PRIORITY, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = 1, }; /* * scan_control uses s8 fields for order, priority, and reclaim_idx. * Confirm they are large enough for max values. */ BUILD_BUG_ON(MAX_PAGE_ORDER >= S8_MAX); BUILD_BUG_ON(DEF_PRIORITY > S8_MAX); BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX); /* * Do not enter reclaim if fatal signal was delivered while throttled. * 1 is returned so that the page allocator does not OOM kill at this * point. */ if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask)) return 1; set_task_reclaim_state(current, &sc.reclaim_state); trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); set_task_reclaim_state(current, NULL); return nr_reclaimed; } #ifdef CONFIG_MEMCG /* Only used by soft limit reclaim. Do not reuse for anything else. */ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, unsigned long *nr_scanned) { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); struct scan_control sc = { .nr_to_reclaim = SWAP_CLUSTER_MAX, .target_mem_cgroup = memcg, .may_writepage = !laptop_mode, .may_unmap = 1, .reclaim_idx = MAX_NR_ZONES - 1, .may_swap = !noswap, }; WARN_ON_ONCE(!current->reclaim_state); sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, sc.gfp_mask); /* * NOTE: Although we can get the priority field, using it * here is not a good idea, since it limits the pages we can scan. * if we don't reclaim here, the shrink_node from balance_pgdat * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ shrink_lruvec(lruvec, &sc); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); *nr_scanned = sc.nr_scanned; return sc.nr_reclaimed; } unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, unsigned int reclaim_options, int *swappiness) { unsigned long nr_reclaimed; unsigned int noreclaim_flag; struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), .proactive_swappiness = swappiness, .gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), .reclaim_idx = MAX_NR_ZONES - 1, .target_mem_cgroup = memcg, .priority = DEF_PRIORITY, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP), .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE), }; /* * Traverse the ZONELIST_FALLBACK zonelist of the current node to put * equal pressure on all the nodes. This is based on the assumption that * the reclaim does not bail out early. */ struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); set_task_reclaim_state(current, &sc.reclaim_state); trace_mm_vmscan_memcg_reclaim_begin(0, sc.gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); memalloc_noreclaim_restore(noreclaim_flag); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); set_task_reclaim_state(current, NULL); return nr_reclaimed; } #endif static void kswapd_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; struct lruvec *lruvec; if (lru_gen_enabled()) { lru_gen_age_node(pgdat, sc); return; } if (!can_age_anon_pages(pgdat, sc)) return; lruvec = mem_cgroup_lruvec(NULL, pgdat); if (!inactive_is_low(lruvec, LRU_INACTIVE_ANON)) return; memcg = mem_cgroup_iter(NULL, NULL, NULL); do { lruvec = mem_cgroup_lruvec(memcg, pgdat); shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); memcg = mem_cgroup_iter(NULL, memcg, NULL); } while (memcg); } static bool pgdat_watermark_boosted(pg_data_t *pgdat, int highest_zoneidx) { int i; struct zone *zone; /* * Check for watermark boosts top-down as the higher zones * are more likely to be boosted. Both watermarks and boosts * should not be checked at the same time as reclaim would * start prematurely when there is no boosting and a lower * zone is balanced. */ for (i = highest_zoneidx; i >= 0; i--) { zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; if (zone->watermark_boost) return true; } return false; } /* * Returns true if there is an eligible zone balanced for the request order * and highest_zoneidx */ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) { int i; unsigned long mark = -1; struct zone *zone; /* * Check watermarks bottom-up as lower zones are more likely to * meet watermarks. */ for (i = 0; i <= highest_zoneidx; i++) { zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) mark = wmark_pages(zone, WMARK_PROMO); else mark = high_wmark_pages(zone); if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx)) return true; } /* * If a node has no managed zone within highest_zoneidx, it does not * need balancing by definition. This can happen if a zone-restricted * allocation tries to wake a remote kswapd. */ if (mark == -1) return true; return false; } /* Clear pgdat state for congested, dirty or under writeback. */ static void clear_pgdat_congested(pg_data_t *pgdat) { struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat); clear_bit(LRUVEC_NODE_CONGESTED, &lruvec->flags); clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags); clear_bit(PGDAT_DIRTY, &pgdat->flags); clear_bit(PGDAT_WRITEBACK, &pgdat->flags); } /* * Prepare kswapd for sleeping. This verifies that there are no processes * waiting in throttle_direct_reclaim() and that watermarks have been met. * * Returns true if kswapd is ready to sleep */ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int highest_zoneidx) { /* * The throttled processes are normally woken up in balance_pgdat() as * soon as allow_direct_reclaim() is true. But there is a potential * race between when kswapd checks the watermarks and a process gets * throttled. There is also a potential race if processes get * throttled, kswapd wakes, a large process exits thereby balancing the * zones, which causes kswapd to exit balance_pgdat() before reaching * the wake up checks. If kswapd is going to sleep, no process should * be sleeping on pfmemalloc_wait, so wake them now if necessary. If * the wake up is premature, processes will wake kswapd and get * throttled again. The difference from wake ups in balance_pgdat() is * that here we are under prepare_to_wait(). */ if (waitqueue_active(&pgdat->pfmemalloc_wait)) wake_up_all(&pgdat->pfmemalloc_wait); /* Hopeless node, leave it to direct reclaim */ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) return true; if (pgdat_balanced(pgdat, order, highest_zoneidx)) { clear_pgdat_congested(pgdat); return true; } return false; } /* * kswapd shrinks a node of pages that are at or below the highest usable * zone that is currently unbalanced. * * Returns true if kswapd scanned at least the requested number of pages to * reclaim or if the lack of progress was due to pages under writeback. * This is used to determine if the scanning priority needs to be raised. */ static bool kswapd_shrink_node(pg_data_t *pgdat, struct scan_control *sc) { struct zone *zone; int z; unsigned long nr_reclaimed = sc->nr_reclaimed; /* Reclaim a number of pages proportional to the number of zones */ sc->nr_to_reclaim = 0; for (z = 0; z <= sc->reclaim_idx; z++) { zone = pgdat->node_zones + z; if (!managed_zone(zone)) continue; sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX); } /* * Historically care was taken to put equal pressure on all zones but * now pressure is applied based on node LRU order. */ shrink_node(pgdat, sc); /* * Fragmentation may mean that the system cannot be rebalanced for * high-order allocations. If twice the allocation size has been * reclaimed then recheck watermarks only at order-0 to prevent * excessive reclaim. Assume that a process requested a high-order * can direct reclaim/compact. */ if (sc->order && sc->nr_reclaimed >= compact_gap(sc->order)) sc->order = 0; /* account for progress from mm_account_reclaimed_pages() */ return max(sc->nr_scanned, sc->nr_reclaimed - nr_reclaimed) >= sc->nr_to_reclaim; } /* Page allocator PCP high watermark is lowered if reclaim is active. */ static inline void update_reclaim_active(pg_data_t *pgdat, int highest_zoneidx, bool active) { int i; struct zone *zone; for (i = 0; i <= highest_zoneidx; i++) { zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; if (active) set_bit(ZONE_RECLAIM_ACTIVE, &zone->flags); else clear_bit(ZONE_RECLAIM_ACTIVE, &zone->flags); } } static inline void set_reclaim_active(pg_data_t *pgdat, int highest_zoneidx) { update_reclaim_active(pgdat, highest_zoneidx, true); } static inline void clear_reclaim_active(pg_data_t *pgdat, int highest_zoneidx) { update_reclaim_active(pgdat, highest_zoneidx, false); } /* * For kswapd, balance_pgdat() will reclaim pages across a node from zones * that are eligible for use by the caller until at least one zone is * balanced. * * Returns the order kswapd finished reclaiming at. * * kswapd scans the zones in the highmem->normal->dma direction. It skips * zones which have free_pages > high_wmark_pages(zone), but once a zone is * found to have free_pages <= high_wmark_pages(zone), any page in that zone * or lower is eligible for reclaim until at least one usable zone is * balanced. */ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx) { int i; unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; unsigned long pflags; unsigned long nr_boost_reclaim; unsigned long zone_boosts[MAX_NR_ZONES] = { 0, }; bool boosted; struct zone *zone; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .order = order, .may_unmap = 1, }; set_task_reclaim_state(current, &sc.reclaim_state); psi_memstall_enter(&pflags); __fs_reclaim_acquire(_THIS_IP_); count_vm_event(PAGEOUTRUN); /* * Account for the reclaim boost. Note that the zone boost is left in * place so that parallel allocations that are near the watermark will * stall or direct reclaim until kswapd is finished. */ nr_boost_reclaim = 0; for (i = 0; i <= highest_zoneidx; i++) { zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; nr_boost_reclaim += zone->watermark_boost; zone_boosts[i] = zone->watermark_boost; } boosted = nr_boost_reclaim; restart: set_reclaim_active(pgdat, highest_zoneidx); sc.priority = DEF_PRIORITY; do { unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; bool balanced; bool ret; bool was_frozen; sc.reclaim_idx = highest_zoneidx; /* * If the number of buffer_heads exceeds the maximum allowed * then consider reclaiming from all zones. This has a dual * purpose -- on 64-bit systems it is expected that * buffer_heads are stripped during active rotation. On 32-bit * systems, highmem pages can pin lowmem memory and shrinking * buffers can relieve lowmem pressure. Reclaim may still not * go ahead if all eligible zones for the original allocation * request are balanced to avoid excessive reclaim from kswapd. */ if (buffer_heads_over_limit) { for (i = MAX_NR_ZONES - 1; i >= 0; i--) { zone = pgdat->node_zones + i; if (!managed_zone(zone)) continue; sc.reclaim_idx = i; break; } } /* * If the pgdat is imbalanced then ignore boosting and preserve * the watermarks for a later time and restart. Note that the * zone watermarks will be still reset at the end of balancing * on the grounds that the normal reclaim should be enough to * re-evaluate if boosting is required when kswapd next wakes. */ balanced = pgdat_balanced(pgdat, sc.order, highest_zoneidx); if (!balanced && nr_boost_reclaim) { nr_boost_reclaim = 0; goto restart; } /* * If boosting is not active then only reclaim if there are no * eligible zones. Note that sc.reclaim_idx is not used as * buffer_heads_over_limit may have adjusted it. */ if (!nr_boost_reclaim && balanced) goto out; /* Limit the priority of boosting to avoid reclaim writeback */ if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) raise_priority = false; /* * Do not writeback or swap pages for boosted reclaim. The * intent is to relieve pressure not issue sub-optimal IO * from reclaim context. If no pages are reclaimed, the * reclaim will be aborted. */ sc.may_writepage = !laptop_mode && !nr_boost_reclaim; sc.may_swap = !nr_boost_reclaim; /* * Do some background aging, to give pages a chance to be * referenced before reclaiming. All pages are rotated * regardless of classzone as this is about consistent aging. */ kswapd_age_node(pgdat, &sc); /* * If we're getting trouble reclaiming, start doing writepage * even in laptop mode. */ if (sc.priority < DEF_PRIORITY - 2) sc.may_writepage = 1; /* Call soft limit reclaim before calling shrink_node. */ sc.nr_scanned = 0; nr_soft_scanned = 0; nr_soft_reclaimed = memcg1_soft_limit_reclaim(pgdat, sc.order, sc.gfp_mask, &nr_soft_scanned); sc.nr_reclaimed += nr_soft_reclaimed; /* * There should be no need to raise the scanning priority if * enough pages are already being scanned that that high * watermark would be met at 100% efficiency. */ if (kswapd_shrink_node(pgdat, &sc)) raise_priority = false; /* * If the low watermark is met there is no need for processes * to be throttled on pfmemalloc_wait as they should not be * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && allow_direct_reclaim(pgdat)) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ __fs_reclaim_release(_THIS_IP_); ret = kthread_freezable_should_stop(&was_frozen); __fs_reclaim_acquire(_THIS_IP_); if (was_frozen || ret) break; /* * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); /* * If reclaim made no progress for a boost, stop reclaim as * IO cannot be queued and it could be an infinite loop in * extreme circumstances. */ if (nr_boost_reclaim && !nr_reclaimed) break; if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); /* * Restart only if it went through the priority loop all the way, * but cache_trim_mode didn't work. */ if (!sc.nr_reclaimed && sc.priority < 1 && !sc.no_cache_trim_mode && sc.cache_trim_mode_failed) { sc.no_cache_trim_mode = 1; goto restart; } if (!sc.nr_reclaimed) pgdat->kswapd_failures++; out: clear_reclaim_active(pgdat, highest_zoneidx); /* If reclaim was boosted, account for the reclaim done in this pass */ if (boosted) { unsigned long flags; for (i = 0; i <= highest_zoneidx; i++) { if (!zone_boosts[i]) continue; /* Increments are under the zone lock */ zone = pgdat->node_zones + i; spin_lock_irqsave(&zone->lock, flags); zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); spin_unlock_irqrestore(&zone->lock, flags); } /* * As there is now likely space, wakeup kcompact to defragment * pageblocks. */ wakeup_kcompactd(pgdat, pageblock_order, highest_zoneidx); } snapshot_refaults(NULL, pgdat); __fs_reclaim_release(_THIS_IP_); psi_memstall_leave(&pflags); set_task_reclaim_state(current, NULL); /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller * entered the allocator slow path while kswapd was awake, order will * remain at the higher level. */ return sc.order; } /* * The pgdat->kswapd_highest_zoneidx is used to pass the highest zone index to * be reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is * not a valid index then either kswapd runs for first time or kswapd couldn't * sleep after previous reclaim attempt (node is still unbalanced). In that * case return the zone index of the previous kswapd reclaim cycle. */ static enum zone_type kswapd_highest_zoneidx(pg_data_t *pgdat, enum zone_type prev_highest_zoneidx) { enum zone_type curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); return curr_idx == MAX_NR_ZONES ? prev_highest_zoneidx : curr_idx; } static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order, unsigned int highest_zoneidx) { long remaining = 0; DEFINE_WAIT(wait); if (freezing(current) || kthread_should_stop()) return; prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); /* * Try to sleep for a short interval. Note that kcompactd will only be * woken if it is possible to sleep for a short interval. This is * deliberate on the assumption that if reclaim cannot keep an * eligible zone balanced that it's also unlikely that compaction will * succeed. */ if (prepare_kswapd_sleep(pgdat, reclaim_order, highest_zoneidx)) { /* * Compaction records what page blocks it recently failed to * isolate pages from and skips them in the future scanning. * When kswapd is going to sleep, it is reasonable to assume * that pages and compaction may succeed so reset the cache. */ reset_isolation_suitable(pgdat); /* * We have freed the memory, now we should compact it to make * allocation of the requested order possible. */ wakeup_kcompactd(pgdat, alloc_order, highest_zoneidx); remaining = schedule_timeout(HZ/10); /* * If woken prematurely then reset kswapd_highest_zoneidx and * order. The values will either be from a wakeup request or * the previous request that slept prematurely. */ if (remaining) { WRITE_ONCE(pgdat->kswapd_highest_zoneidx, kswapd_highest_zoneidx(pgdat, highest_zoneidx)); if (READ_ONCE(pgdat->kswapd_order) < reclaim_order) WRITE_ONCE(pgdat->kswapd_order, reclaim_order); } finish_wait(&pgdat->kswapd_wait, &wait); prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); } /* * After a short sleep, check if it was a premature sleep. If not, then * go fully to sleep until explicitly woken up. */ if (!remaining && prepare_kswapd_sleep(pgdat, reclaim_order, highest_zoneidx)) { trace_mm_vmscan_kswapd_sleep(pgdat->node_id); /* * vmstat counters are not perfectly accurate and the estimated * value for counters such as NR_FREE_PAGES can deviate from the * true value by nr_online_cpus * threshold. To avoid the zone * watermarks being breached while under pressure, we reduce the * per-cpu vmstat threshold while kswapd is awake and restore * them before going back to sleep. */ set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); if (!kthread_should_stop()) schedule(); set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); } else { if (remaining) count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY); else count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY); } finish_wait(&pgdat->kswapd_wait, &wait); } /* * The background pageout daemon, started as a kernel thread * from the init process. * * This basically trickles out pages so that we have _some_ * free memory available even if there is no other activity * that frees anything up. This is needed for things like routing * etc, where we otherwise might have all activity going on in * asynchronous contexts that cannot page things out. * * If there are applications that are active memory-allocators * (most normal use), this basically shouldn't matter. */ static int kswapd(void *p) { unsigned int alloc_order, reclaim_order; unsigned int highest_zoneidx = MAX_NR_ZONES - 1; pg_data_t *pgdat = (pg_data_t *)p; struct task_struct *tsk = current; const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); if (!cpumask_empty(cpumask)) set_cpus_allowed_ptr(tsk, cpumask); /* * Tell the memory management that we're a "memory allocator", * and that if we need more memory we should get access to it * regardless (see "__alloc_pages()"). "kswapd" should * never get caught in the normal page freeing logic. * * (Kswapd normally doesn't need memory anyway, but sometimes * you need a small amount of memory in order to be able to * page out something else, and this flag essentially protects * us from recursively trying to free more memory as we're * trying to free the first piece of memory in the first place). */ tsk->flags |= PF_MEMALLOC | PF_KSWAPD; set_freezable(); WRITE_ONCE(pgdat->kswapd_order, 0); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); atomic_set(&pgdat->nr_writeback_throttled, 0); for ( ; ; ) { bool was_frozen; alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order); highest_zoneidx = kswapd_highest_zoneidx(pgdat, highest_zoneidx); kswapd_try_sleep: kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order, highest_zoneidx); /* Read the new order and highest_zoneidx */ alloc_order = READ_ONCE(pgdat->kswapd_order); highest_zoneidx = kswapd_highest_zoneidx(pgdat, highest_zoneidx); WRITE_ONCE(pgdat->kswapd_order, 0); WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES); if (kthread_freezable_should_stop(&was_frozen)) break; /* * We can speed up thawing tasks if we don't call balance_pgdat * after returning from the refrigerator */ if (was_frozen) continue; /* * Reclaim begins at the requested order but if a high-order * reclaim fails then kswapd falls back to reclaiming for * order-0. If that happens, kswapd will consider sleeping * for the order it finished reclaiming at (reclaim_order) * but kcompactd is woken to compact for the original * request (alloc_order). */ trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx, alloc_order); reclaim_order = balance_pgdat(pgdat, alloc_order, highest_zoneidx); if (reclaim_order < alloc_order) goto kswapd_try_sleep; } tsk->flags &= ~(PF_MEMALLOC | PF_KSWAPD); return 0; } /* * A zone is low on free memory or too fragmented for high-order memory. If * kswapd should reclaim (direct reclaim is deferred), wake it up for the zone's * pgdat. It will wake up kcompactd after reclaiming memory. If kswapd reclaim * has failed or is not needed, still wake up kcompactd if only compaction is * needed. */ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, enum zone_type highest_zoneidx) { pg_data_t *pgdat; enum zone_type curr_idx; if (!managed_zone(zone)) return; if (!cpuset_zone_allowed(zone, gfp_flags)) return; pgdat = zone->zone_pgdat; curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx); if (curr_idx == MAX_NR_ZONES || curr_idx < highest_zoneidx) WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx); if (READ_ONCE(pgdat->kswapd_order) < order) WRITE_ONCE(pgdat->kswapd_order, order); if (!waitqueue_active(&pgdat->kswapd_wait)) return; /* Hopeless node, leave it to direct reclaim if possible */ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || (pgdat_balanced(pgdat, order, highest_zoneidx) && !pgdat_watermark_boosted(pgdat, highest_zoneidx))) { /* * There may be plenty of free memory available, but it's too * fragmented for high-order allocations. Wake up kcompactd * and rely on compaction_suitable() to determine if it's * needed. If it fails, it will defer subsequent attempts to * ratelimit its work. */ if (!(gfp_flags & __GFP_DIRECT_RECLAIM)) wakeup_kcompactd(pgdat, order, highest_zoneidx); return; } trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order, gfp_flags); wake_up_interruptible(&pgdat->kswapd_wait); } #ifdef CONFIG_HIBERNATION /* * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of * freed pages. * * Rather than trying to age LRUs the aim is to preserve the overall * LRU order by reclaiming preferentially * inactive > active > active referenced > active mapped */ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) { struct scan_control sc = { .nr_to_reclaim = nr_to_reclaim, .gfp_mask = GFP_HIGHUSER_MOVABLE, .reclaim_idx = MAX_NR_ZONES - 1, .priority = DEF_PRIORITY, .may_writepage = 1, .may_unmap = 1, .may_swap = 1, .hibernation_mode = 1, }; struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); unsigned long nr_reclaimed; unsigned int noreclaim_flag; fs_reclaim_acquire(sc.gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); set_task_reclaim_state(current, &sc.reclaim_state); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); set_task_reclaim_state(current, NULL); memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); return nr_reclaimed; } #endif /* CONFIG_HIBERNATION */ /* * This kswapd start function will be called by init and node-hot-add. */ void __meminit kswapd_run(int nid) { pg_data_t *pgdat = NODE_DATA(nid); pgdat_kswapd_lock(pgdat); if (!pgdat->kswapd) { pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); if (IS_ERR(pgdat->kswapd)) { /* failure at boot is fatal */ pr_err("Failed to start kswapd on node %d,ret=%ld\n", nid, PTR_ERR(pgdat->kswapd)); BUG_ON(system_state < SYSTEM_RUNNING); pgdat->kswapd = NULL; } } pgdat_kswapd_unlock(pgdat); } /* * Called by memory hotplug when all memory in a node is offlined. Caller must * be holding mem_hotplug_begin/done(). */ void __meminit kswapd_stop(int nid) { pg_data_t *pgdat = NODE_DATA(nid); struct task_struct *kswapd; pgdat_kswapd_lock(pgdat); kswapd = pgdat->kswapd; if (kswapd) { kthread_stop(kswapd); pgdat->kswapd = NULL; } pgdat_kswapd_unlock(pgdat); } static int __init kswapd_init(void) { int nid; swap_setup(); for_each_node_state(nid, N_MEMORY) kswapd_run(nid); return 0; } module_init(kswapd_init) #ifdef CONFIG_NUMA /* * Node reclaim mode * * If non-zero call node_reclaim when the number of free pages falls below * the watermarks. */ int node_reclaim_mode __read_mostly; /* * Priority for NODE_RECLAIM. This determines the fraction of pages * of a node considered for each zone_reclaim. 4 scans 1/16th of * a zone. */ #define NODE_RECLAIM_PRIORITY 4 /* * Percentage of pages in a zone that must be unmapped for node_reclaim to * occur. */ int sysctl_min_unmapped_ratio = 1; /* * If the number of slab pages in a zone grows beyond this percentage then * slab reclaim needs to occur. */ int sysctl_min_slab_ratio = 5; static inline unsigned long node_unmapped_file_pages(struct pglist_data *pgdat) { unsigned long file_mapped = node_page_state(pgdat, NR_FILE_MAPPED); unsigned long file_lru = node_page_state(pgdat, NR_INACTIVE_FILE) + node_page_state(pgdat, NR_ACTIVE_FILE); /* * It's possible for there to be more file mapped pages than * accounted for by the pages on the file LRU lists because * tmpfs pages accounted for as ANON can also be FILE_MAPPED */ return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; } /* Work out how many page cache pages we can reclaim in this reclaim_mode */ static unsigned long node_pagecache_reclaimable(struct pglist_data *pgdat) { unsigned long nr_pagecache_reclaimable; unsigned long delta = 0; /* * If RECLAIM_UNMAP is set, then all file pages are considered * potentially reclaimable. Otherwise, we have to worry about * pages like swapcache and node_unmapped_file_pages() provides * a better estimate */ if (node_reclaim_mode & RECLAIM_UNMAP) nr_pagecache_reclaimable = node_page_state(pgdat, NR_FILE_PAGES); else nr_pagecache_reclaimable = node_unmapped_file_pages(pgdat); /* If we can't clean pages, remove dirty pages from consideration */ if (!(node_reclaim_mode & RECLAIM_WRITE)) delta += node_page_state(pgdat, NR_FILE_DIRTY); /* Watch for any possible underflows due to delta */ if (unlikely(delta > nr_pagecache_reclaimable)) delta = nr_pagecache_reclaimable; return nr_pagecache_reclaimable - delta; } /* * Try to free up some pages from this node through reclaim. */ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) { /* Minimum pages needed in order to stay on node */ const unsigned long nr_pages = 1 << order; struct task_struct *p = current; unsigned int noreclaim_flag; struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), .gfp_mask = current_gfp_context(gfp_mask), .order = order, .priority = NODE_RECLAIM_PRIORITY, .may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE), .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP), .may_swap = 1, .reclaim_idx = gfp_zone(gfp_mask), }; unsigned long pflags; trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, sc.gfp_mask); cond_resched(); psi_memstall_enter(&pflags); delayacct_freepages_start(); fs_reclaim_acquire(sc.gfp_mask); /* * We need to be able to allocate from the reserves for RECLAIM_UNMAP */ noreclaim_flag = memalloc_noreclaim_save(); set_task_reclaim_state(p, &sc.reclaim_state); if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages || node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B) > pgdat->min_slab_pages) { /* * Free memory by calling shrink node with increasing * priorities until we have enough memory freed. */ do { shrink_node(pgdat, &sc); } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } set_task_reclaim_state(p, NULL); memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); psi_memstall_leave(&pflags); delayacct_freepages_end(); trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed); return sc.nr_reclaimed >= nr_pages; } int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) { int ret; /* * Node reclaim reclaims unmapped file backed pages and * slab pages if we are over the defined limits. * * A small portion of unmapped file backed pages is needed for * file I/O otherwise pages read by file I/O will be immediately * thrown out if the node is overallocated. So we do not reclaim * if less than a specified percentage of the node is used by * unmapped file backed pages. */ if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B) <= pgdat->min_slab_pages) return NODE_RECLAIM_FULL; /* * Do not scan if the allocation should not be delayed. */ if (!gfpflags_allow_blocking(gfp_mask) || (current->flags & PF_MEMALLOC)) return NODE_RECLAIM_NOSCAN; /* * Only run node reclaim on the local node or on nodes that do not * have associated processors. This will favor the local processor * over remote processors and spread off node memory allocations * as wide as possible. */ if (node_state(pgdat->node_id, N_CPU) && pgdat->node_id != numa_node_id()) return NODE_RECLAIM_NOSCAN; if (test_and_set_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags)) return NODE_RECLAIM_NOSCAN; ret = __node_reclaim(pgdat, gfp_mask, order); clear_bit(PGDAT_RECLAIM_LOCKED, &pgdat->flags); if (!ret) count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); return ret; } #endif /** * check_move_unevictable_folios - Move evictable folios to appropriate zone * lru list * @fbatch: Batch of lru folios to check. * * Checks folios for evictability, if an evictable folio is in the unevictable * lru list, moves it to the appropriate evictable lru list. This function * should be only used for lru folios. */ void check_move_unevictable_folios(struct folio_batch *fbatch) { struct lruvec *lruvec = NULL; int pgscanned = 0; int pgrescued = 0; int i; for (i = 0; i < fbatch->nr; i++) { struct folio *folio = fbatch->folios[i]; int nr_pages = folio_nr_pages(folio); pgscanned += nr_pages; /* block memcg migration while the folio moves between lrus */ if (!folio_test_clear_lru(folio)) continue; lruvec = folio_lruvec_relock_irq(folio, lruvec); if (folio_evictable(folio) && folio_test_unevictable(folio)) { lruvec_del_folio(lruvec, folio); folio_clear_unevictable(folio); lruvec_add_folio(lruvec, folio); pgrescued += nr_pages; } folio_set_lru(folio); } if (lruvec) { __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); unlock_page_lruvec_irq(lruvec); } else if (pgscanned) { count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); } } EXPORT_SYMBOL_GPL(check_move_unevictable_folios);
18 18 1 11 6 2 10 6 1 16 10 6 61 41 17 3 17 61 61 61 20 3 17 17 3 43 61 52 9 58 3 167 4 165 2 19 6 1 6 10 8 2 7 3 12 3 2 9 5 2 1 2 1 2 1 6 1 5 5 5 38 38 2 35 1 1 6 6 7 1 6 8 1 7 2 7 8 8 8 8 49 49 175 104 3 32 49 49 176 176 70 116 10 11 12 12 12 10 79 79 3 2 1 1 1 2 121 121 19 192 17 17 17 89 88 1 1 116 1 116 121 19 76 6 22 96 121 121 121 121 121 121 50 44 83 115 116 17 27 79 109 109 108 1 35 77 85 27 81 31 6 103 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 // SPDX-License-Identifier: GPL-2.0-only /* * * Copyright (C) 2011 Novell Inc. */ #include <linux/fs.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/xattr.h> #include <linux/ratelimit.h> #include <linux/fiemap.h> #include <linux/fileattr.h> #include <linux/security.h> #include <linux/namei.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include "overlayfs.h" int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int err; struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool full_copy_up = false; struct dentry *upperdentry; const struct cred *old_cred; err = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (err) return err; if (attr->ia_valid & ATTR_SIZE) { /* Truncate should trigger data copy up as well */ full_copy_up = true; } if (!full_copy_up) err = ovl_copy_up(dentry); else err = ovl_copy_up_with_data(dentry); if (!err) { struct inode *winode = NULL; upperdentry = ovl_dentry_upper(dentry); if (attr->ia_valid & ATTR_SIZE) { winode = d_inode(upperdentry); err = get_write_access(winode); if (err) goto out; } if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) attr->ia_valid &= ~ATTR_MODE; /* * We might have to translate ovl file into real file object * once use cases emerge. For now, simply don't let underlying * filesystem rely on attr->ia_file */ attr->ia_valid &= ~ATTR_FILE; /* * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN * set. Overlayfs does not pass O_TRUNC flag to underlying * filesystem during open -> do not pass ATTR_OPEN. This * disables optimization in fuse which assumes open(O_TRUNC) * already set file size to 0. But we never passed O_TRUNC to * fuse. So by clearing ATTR_OPEN, fuse will be forced to send * setattr request to server. */ attr->ia_valid &= ~ATTR_OPEN; err = ovl_want_write(dentry); if (err) goto out_put_write; inode_lock(upperdentry->d_inode); old_cred = ovl_override_creds(dentry->d_sb); err = ovl_do_notify_change(ofs, upperdentry, attr); revert_creds(old_cred); if (!err) ovl_copyattr(dentry->d_inode); inode_unlock(upperdentry->d_inode); ovl_drop_write(dentry); out_put_write: if (winode) put_write_access(winode); } out: return err; } static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool samefs = ovl_same_fs(ofs); unsigned int xinobits = ovl_xino_bits(ofs); unsigned int xinoshift = 64 - xinobits; if (samefs) { /* * When all layers are on the same fs, all real inode * number are unique, so we use the overlay st_dev, * which is friendly to du -x. */ stat->dev = dentry->d_sb->s_dev; return; } else if (xinobits) { /* * All inode numbers of underlying fs should not be using the * high xinobits, so we use high xinobits to partition the * overlay st_ino address space. The high bits holds the fsid * (upper fsid is 0). The lowest xinobit is reserved for mapping * the non-persistent inode numbers range in case of overflow. * This way all overlay inode numbers are unique and use the * overlay st_dev. */ if (likely(!(stat->ino >> xinoshift))) { stat->ino |= ((u64)fsid) << (xinoshift + 1); stat->dev = dentry->d_sb->s_dev; return; } else if (ovl_xino_warn(ofs)) { pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", dentry, stat->ino, xinobits); } } /* The inode could not be mapped to a unified st_ino address space */ if (S_ISDIR(dentry->d_inode->i_mode)) { /* * Always use the overlay st_dev for directories, so 'find * -xdev' will scan the entire overlay mount and won't cross the * overlay mount boundaries. * * If not all layers are on the same fs the pair {real st_ino; * overlay st_dev} is not unique, so use the non persistent * overlay st_ino for directories. */ stat->dev = dentry->d_sb->s_dev; stat->ino = dentry->d_inode->i_ino; } else { /* * For non-samefs setup, if we cannot map all layers st_ino * to a unified address space, we need to make sure that st_dev * is unique per underlying fs, so we use the unique anonymous * bdev assigned to the underlying fs. */ stat->dev = ofs->fs[fsid].pseudo_dev; } } int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; enum ovl_path_type type; struct path realpath; const struct cred *old_cred; struct inode *inode = d_inode(dentry); bool is_dir = S_ISDIR(inode->i_mode); int fsid = 0; int err; bool metacopy_blocks = false; metacopy_blocks = ovl_is_metacopy_dentry(dentry); type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); err = ovl_do_getattr(&realpath, stat, request_mask, flags); if (err) goto out; /* Report the effective immutable/append-only STATX flags */ generic_fill_statx_attr(inode, stat); /* * For non-dir or same fs, we use st_ino of the copy up origin. * This guaranties constant st_dev/st_ino across copy up. * With xino feature and non-samefs, we use st_ino of the copy up * origin masked with high bits that represent the layer id. * * If lower filesystem supports NFS file handles, this also guaranties * persistent st_ino across mount cycle. */ if (!is_dir || ovl_same_dev(OVL_FS(dentry->d_sb))) { if (!OVL_TYPE_UPPER(type)) { fsid = ovl_layer_lower(dentry)->fsid; } else if (OVL_TYPE_ORIGIN(type)) { struct kstat lowerstat; u32 lowermask = STATX_INO | STATX_BLOCKS | (!is_dir ? STATX_NLINK : 0); ovl_path_lower(dentry, &realpath); err = ovl_do_getattr(&realpath, &lowerstat, lowermask, flags); if (err) goto out; /* * Lower hardlinks may be broken on copy up to different * upper files, so we cannot use the lower origin st_ino * for those different files, even for the same fs case. * * Similarly, several redirected dirs can point to the * same dir on a lower layer. With the "verify_lower" * feature, we do not use the lower origin st_ino, if * we haven't verified that this redirect is unique. * * With inodes index enabled, it is safe to use st_ino * of an indexed origin. The index validates that the * upper hardlink is not broken and that a redirected * dir is the only redirect to that origin. */ if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || (!ovl_verify_lower(dentry->d_sb) && (is_dir || lowerstat.nlink == 1))) { fsid = ovl_layer_lower(dentry)->fsid; stat->ino = lowerstat.ino; } /* * If we are querying a metacopy dentry and lower * dentry is data dentry, then use the blocks we * queried just now. We don't have to do additional * vfs_getattr(). If lower itself is metacopy, then * additional vfs_getattr() is unavoidable. */ if (metacopy_blocks && realpath.dentry == ovl_dentry_lowerdata(dentry)) { stat->blocks = lowerstat.blocks; metacopy_blocks = false; } } if (metacopy_blocks) { /* * If lower is not same as lowerdata or if there was * no origin on upper, we can end up here. * With lazy lowerdata lookup, guess lowerdata blocks * from size to avoid lowerdata lookup on stat(2). */ struct kstat lowerdatastat; u32 lowermask = STATX_BLOCKS; ovl_path_lowerdata(dentry, &realpath); if (realpath.dentry) { err = ovl_do_getattr(&realpath, &lowerdatastat, lowermask, flags); if (err) goto out; } else { lowerdatastat.blocks = round_up(stat->size, stat->blksize) >> 9; } stat->blocks = lowerdatastat.blocks; } } ovl_map_dev_ino(dentry, stat, fsid); /* * It's probably not worth it to count subdirs to get the * correct link count. nlink=1 seems to pacify 'find' and * other utilities. */ if (is_dir && OVL_TYPE_MERGE(type)) stat->nlink = 1; /* * Return the overlay inode nlinks for indexed upper inodes. * Overlay inode nlink counts the union of the upper hardlinks * and non-covered lower hardlinks. It does not include the upper * index hardlink. */ if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) stat->nlink = dentry->d_inode->i_nlink; out: revert_creds(old_cred); return err; } int ovl_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct inode *upperinode = ovl_inode_upper(inode); struct inode *realinode; struct path realpath; const struct cred *old_cred; int err; /* Careful in RCU walk mode */ realinode = ovl_i_path_real(inode, &realpath); if (!realinode) { WARN_ON(!(mask & MAY_NOT_BLOCK)); return -ECHILD; } /* * Check overlay inode with the creds of task and underlying inode * with creds of mounter */ err = generic_permission(&nop_mnt_idmap, inode, mask); if (err) return err; old_cred = ovl_override_creds(inode->i_sb); if (!upperinode && !special_file(realinode->i_mode) && mask & MAY_WRITE) { mask &= ~(MAY_WRITE | MAY_APPEND); /* Make sure mounter can read file for copy up later */ mask |= MAY_READ; } err = inode_permission(mnt_idmap(realpath.mnt), realinode, mask); revert_creds(old_cred); return err; } static const char *ovl_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { const struct cred *old_cred; const char *p; if (!dentry) return ERR_PTR(-ECHILD); old_cred = ovl_override_creds(dentry->d_sb); p = vfs_get_link(ovl_dentry_real(dentry), done); revert_creds(old_cred); return p; } #ifdef CONFIG_FS_POSIX_ACL /* * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone * of the POSIX ACLs retrieved from the lower layer to this function to not * alter the POSIX ACLs for the underlying filesystem. */ static void ovl_idmap_posix_acl(const struct inode *realinode, struct mnt_idmap *idmap, struct posix_acl *acl) { struct user_namespace *fs_userns = i_user_ns(realinode); for (unsigned int i = 0; i < acl->a_count; i++) { vfsuid_t vfsuid; vfsgid_t vfsgid; struct posix_acl_entry *e = &acl->a_entries[i]; switch (e->e_tag) { case ACL_USER: vfsuid = make_vfsuid(idmap, fs_userns, e->e_uid); e->e_uid = vfsuid_into_kuid(vfsuid); break; case ACL_GROUP: vfsgid = make_vfsgid(idmap, fs_userns, e->e_gid); e->e_gid = vfsgid_into_kgid(vfsgid); break; } } } /* * The @noperm argument is used to skip permission checking and is a temporary * measure. Quoting Miklos from an earlier discussion: * * > So there are two paths to getting an acl: * > 1) permission checking and 2) retrieving the value via getxattr(2). * > This is a similar situation as reading a symlink vs. following it. * > When following a symlink overlayfs always reads the link on the * > underlying fs just as if it was a readlink(2) call, calling * > security_inode_readlink() instead of security_inode_follow_link(). * > This is logical: we are reading the link from the underlying storage, * > and following it on overlayfs. * > * > Applying the same logic to acl: we do need to call the * > security_inode_getxattr() on the underlying fs, even if just want to * > check permissions on overlay. This is currently not done, which is an * > inconsistency. * > * > Maybe adding the check to ovl_get_acl() is the right way to go, but * > I'm a little afraid of a performance regression. Will look into that. * * Until we have made a decision allow this helper to take the @noperm * argument. We should hopefully be able to remove it soon. */ struct posix_acl *ovl_get_acl_path(const struct path *path, const char *acl_name, bool noperm) { struct posix_acl *real_acl, *clone; struct mnt_idmap *idmap; struct inode *realinode = d_inode(path->dentry); idmap = mnt_idmap(path->mnt); if (noperm) real_acl = get_inode_acl(realinode, posix_acl_type(acl_name)); else real_acl = vfs_get_acl(idmap, path->dentry, acl_name); if (IS_ERR_OR_NULL(real_acl)) return real_acl; if (!is_idmapped_mnt(path->mnt)) return real_acl; /* * We cannot alter the ACLs returned from the relevant layer as that * would alter the cached values filesystem wide for the lower * filesystem. Instead we can clone the ACLs and then apply the * relevant idmapping of the layer. */ clone = posix_acl_clone(real_acl, GFP_KERNEL); posix_acl_release(real_acl); /* release original acl */ if (!clone) return ERR_PTR(-ENOMEM); ovl_idmap_posix_acl(realinode, idmap, clone); return clone; } /* * When the relevant layer is an idmapped mount we need to take the idmapping * of the layer into account and translate any ACL_{GROUP,USER} values * according to the idmapped mount. * * We cannot alter the ACLs returned from the relevant layer as that would * alter the cached values filesystem wide for the lower filesystem. Instead we * can clone the ACLs and then apply the relevant idmapping of the layer. * * This is obviously only relevant when idmapped layers are used. */ struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, struct inode *inode, int type, bool rcu, bool noperm) { struct inode *realinode; struct posix_acl *acl; struct path realpath; /* Careful in RCU walk mode */ realinode = ovl_i_path_real(inode, &realpath); if (!realinode) { WARN_ON(!rcu); return ERR_PTR(-ECHILD); } if (!IS_POSIXACL(realinode)) return NULL; if (rcu) { /* * If the layer is idmapped drop out of RCU path walk * so we can clone the ACLs. */ if (is_idmapped_mnt(realpath.mnt)) return ERR_PTR(-ECHILD); acl = get_cached_acl_rcu(realinode, type); } else { const struct cred *old_cred; old_cred = ovl_override_creds(inode->i_sb); acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type), noperm); revert_creds(old_cred); } return acl; } static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, struct posix_acl *acl, int type) { int err; struct path realpath; const char *acl_name; const struct cred *old_cred; struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); /* * If ACL is to be removed from a lower file, check if it exists in * the first place before copying it up. */ acl_name = posix_acl_xattr_name(type); if (!acl && !upperdentry) { struct posix_acl *real_acl; ovl_path_lower(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry, acl_name); revert_creds(old_cred); if (IS_ERR(real_acl)) { err = PTR_ERR(real_acl); goto out; } posix_acl_release(real_acl); } if (!upperdentry) { err = ovl_copy_up(dentry); if (err) goto out; realdentry = ovl_dentry_upper(dentry); } err = ovl_want_write(dentry); if (err) goto out; old_cred = ovl_override_creds(dentry->d_sb); if (acl) err = ovl_do_set_acl(ofs, realdentry, acl_name, acl); else err = ovl_do_remove_acl(ofs, realdentry, acl_name); revert_creds(old_cred); ovl_drop_write(dentry); /* copy c/mtime */ ovl_copyattr(inode); out: return err; } int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { int err; struct inode *inode = d_inode(dentry); struct dentry *workdir = ovl_workdir(dentry); struct inode *realinode = ovl_inode_real(inode); if (!IS_POSIXACL(d_inode(workdir))) return -EOPNOTSUPP; if (!realinode->i_op->set_acl) return -EOPNOTSUPP; if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) return -EPERM; /* * Check if sgid bit needs to be cleared (actual setacl operation will * be done with mounter's capabilities and so that won't do it for us). */ if (unlikely(inode->i_mode & S_ISGID) && type == ACL_TYPE_ACCESS && !in_group_p(inode->i_gid) && !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) { struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; err = ovl_setattr(&nop_mnt_idmap, dentry, &iattr); if (err) return err; } return ovl_set_or_remove_acl(dentry, inode, acl, type); } #endif int ovl_update_time(struct inode *inode, int flags) { if (flags & S_ATIME) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); struct path upperpath = { .mnt = ovl_upper_mnt(ofs), .dentry = ovl_upperdentry_dereference(OVL_I(inode)), }; if (upperpath.dentry) { touch_atime(&upperpath); inode_set_atime_to_ts(inode, inode_get_atime(d_inode(upperpath.dentry))); } } return 0; } static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { int err; struct inode *realinode = ovl_inode_realdata(inode); const struct cred *old_cred; if (!realinode) return -EIO; if (!realinode->i_op->fiemap) return -EOPNOTSUPP; old_cred = ovl_override_creds(inode->i_sb); err = realinode->i_op->fiemap(realinode, fieinfo, start, len); revert_creds(old_cred); return err; } /* * Work around the fact that security_file_ioctl() takes a file argument. * Introducing security_inode_fileattr_get/set() hooks would solve this issue * properly. */ static int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa, bool set) { struct file *file; unsigned int cmd; int err; file = dentry_open(realpath, O_RDONLY, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); if (set) cmd = fa->fsx_valid ? FS_IOC_FSSETXATTR : FS_IOC_SETFLAGS; else cmd = fa->fsx_valid ? FS_IOC_FSGETXATTR : FS_IOC_GETFLAGS; err = security_file_ioctl(file, cmd, 0); fput(file); return err; } int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa) { int err; err = ovl_security_fileattr(realpath, fa, true); if (err) return err; return vfs_fileattr_set(mnt_idmap(realpath->mnt), realpath->dentry, fa); } int ovl_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct path upperpath; const struct cred *old_cred; unsigned int flags; int err; err = ovl_copy_up(dentry); if (!err) { ovl_path_real(dentry, &upperpath); err = ovl_want_write(dentry); if (err) goto out; old_cred = ovl_override_creds(inode->i_sb); /* * Store immutable/append-only flags in xattr and clear them * in upper fileattr (in case they were set by older kernel) * so children of "ovl-immutable" directories lower aliases of * "ovl-immutable" hardlinks could be copied up. * Clear xattr when flags are cleared. */ err = ovl_set_protattr(inode, upperpath.dentry, fa); if (!err) err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); ovl_drop_write(dentry); /* * Merge real inode flags with inode flags read from * overlay.protattr xattr */ flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK; BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); /* Update ctime */ ovl_copyattr(inode); } out: return err; } /* Convert inode protection flags to fileattr flags */ static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) { BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); if (inode->i_flags & S_APPEND) { fa->flags |= FS_APPEND_FL; fa->fsx_xflags |= FS_XFLAG_APPEND; } if (inode->i_flags & S_IMMUTABLE) { fa->flags |= FS_IMMUTABLE_FL; fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; } } int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa) { int err; err = ovl_security_fileattr(realpath, fa, false); if (err) return err; err = vfs_fileattr_get(realpath->dentry, fa); if (err == -ENOIOCTLCMD) err = -ENOTTY; return err; } int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct path realpath; const struct cred *old_cred; int err; ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(inode->i_sb); err = ovl_real_fileattr_get(&realpath, fa); ovl_fileattr_prot_flags(inode, fa); revert_creds(old_cred); return err; } static const struct inode_operations ovl_file_inode_operations = { .setattr = ovl_setattr, .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, .get_inode_acl = ovl_get_inode_acl, .get_acl = ovl_get_acl, .set_acl = ovl_set_acl, .update_time = ovl_update_time, .fiemap = ovl_fiemap, .fileattr_get = ovl_fileattr_get, .fileattr_set = ovl_fileattr_set, }; static const struct inode_operations ovl_symlink_inode_operations = { .setattr = ovl_setattr, .get_link = ovl_get_link, .getattr = ovl_getattr, .listxattr = ovl_listxattr, .update_time = ovl_update_time, }; static const struct inode_operations ovl_special_inode_operations = { .setattr = ovl_setattr, .permission = ovl_permission, .getattr = ovl_getattr, .listxattr = ovl_listxattr, .get_inode_acl = ovl_get_inode_acl, .get_acl = ovl_get_acl, .set_acl = ovl_set_acl, .update_time = ovl_update_time, }; static const struct address_space_operations ovl_aops = { /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ .direct_IO = noop_direct_IO, }; /* * It is possible to stack overlayfs instance on top of another * overlayfs instance as lower layer. We need to annotate the * stackable i_mutex locks according to stack level of the super * block instance. An overlayfs instance can never be in stack * depth 0 (there is always a real fs below it). An overlayfs * inode lock will use the lockdep annotation ovl_i_mutex_key[depth]. * * For example, here is a snip from /proc/lockdep_chains after * dir_iterate of nested overlayfs: * * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) * [...] &type->i_mutex_dir_key (stack_depth=0) * * Locking order w.r.t ovl_want_write() is important for nested overlayfs. * * This chain is valid: * - inode->i_rwsem (inode_lock[2]) * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) * - OVL_I(inode)->lock (ovl_inode_lock[2]) * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) * * And this chain is valid: * - inode->i_rwsem (inode_lock[2]) * - OVL_I(inode)->lock (ovl_inode_lock[2]) * - lowerinode->i_rwsem (inode_lock[1]) * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) * * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is * held, because it is in reverse order of the non-nested case using the same * upper fs: * - inode->i_rwsem (inode_lock[1]) * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) * - OVL_I(inode)->lock (ovl_inode_lock[1]) */ #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) { #ifdef CONFIG_LOCKDEP static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; int depth = inode->i_sb->s_stack_depth - 1; if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) depth = 0; if (S_ISDIR(inode->i_mode)) lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); else lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); #endif } static void ovl_next_ino(struct inode *inode) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); inode->i_ino = atomic_long_inc_return(&ofs->last_ino); if (unlikely(!inode->i_ino)) inode->i_ino = atomic_long_inc_return(&ofs->last_ino); } static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); int xinobits = ovl_xino_bits(ofs); unsigned int xinoshift = 64 - xinobits; /* * When d_ino is consistent with st_ino (samefs or i_ino has enough * bits to encode layer), set the same value used for st_ino to i_ino, * so inode number exposed via /proc/locks and a like will be * consistent with d_ino and st_ino values. An i_ino value inconsistent * with d_ino also causes nfsd readdirplus to fail. */ inode->i_ino = ino; if (ovl_same_fs(ofs)) { return; } else if (xinobits && likely(!(ino >> xinoshift))) { inode->i_ino |= (unsigned long)fsid << (xinoshift + 1); return; } /* * For directory inodes on non-samefs with xino disabled or xino * overflow, we allocate a non-persistent inode number, to be used for * resolving st_ino collisions in ovl_map_dev_ino(). * * To avoid ino collision with legitimate xino values from upper * layer (fsid 0), use the lowest xinobit to map the non * persistent inode numbers to the unified st_ino address space. */ if (S_ISDIR(inode->i_mode)) { ovl_next_ino(inode); if (xinobits) { inode->i_ino &= ~0UL >> xinobits; inode->i_ino |= 1UL << xinoshift; } } } void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, unsigned long ino, int fsid) { struct inode *realinode; struct ovl_inode *oi = OVL_I(inode); oi->__upperdentry = oip->upperdentry; oi->oe = oip->oe; oi->redirect = oip->redirect; oi->lowerdata_redirect = oip->lowerdata_redirect; realinode = ovl_inode_real(inode); ovl_copyattr(inode); ovl_copyflags(realinode, inode); ovl_map_ino(inode, ino, fsid); } static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) { inode->i_mode = mode; inode->i_flags |= S_NOCMTIME; #ifdef CONFIG_FS_POSIX_ACL inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; #endif ovl_lockdep_annotate_inode_mutex_key(inode); switch (mode & S_IFMT) { case S_IFREG: inode->i_op = &ovl_file_inode_operations; inode->i_fop = &ovl_file_operations; inode->i_mapping->a_ops = &ovl_aops; break; case S_IFDIR: inode->i_op = &ovl_dir_inode_operations; inode->i_fop = &ovl_dir_operations; break; case S_IFLNK: inode->i_op = &ovl_symlink_inode_operations; break; default: inode->i_op = &ovl_special_inode_operations; init_special_inode(inode, mode, rdev); break; } } /* * With inodes index enabled, an overlay inode nlink counts the union of upper * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure * upper inode, the following nlink modifying operations can happen: * * 1. Lower hardlink copy up * 2. Upper hardlink created, unlinked or renamed over * 3. Lower hardlink whiteout or renamed over * * For the first, copy up case, the union nlink does not change, whether the * operation succeeds or fails, but the upper inode nlink may change. * Therefore, before copy up, we store the union nlink value relative to the * lower inode nlink in the index inode xattr .overlay.nlink. * * For the second, upper hardlink case, the union nlink should be incremented * or decremented IFF the operation succeeds, aligned with nlink change of the * upper inode. Therefore, before link/unlink/rename, we store the union nlink * value relative to the upper inode nlink in the index inode. * * For the last, lower cover up case, we simplify things by preceding the * whiteout or cover up with copy up. This makes sure that there is an index * upper inode where the nlink xattr can be stored before the copied up upper * entry is unlink. */ #define OVL_NLINK_ADD_UPPER (1 << 0) /* * On-disk format for indexed nlink: * * nlink relative to the upper inode - "U[+-]NUM" * nlink relative to the lower inode - "L[+-]NUM" */ static int ovl_set_nlink_common(struct dentry *dentry, struct dentry *realdentry, const char *format) { struct inode *inode = d_inode(dentry); struct inode *realinode = d_inode(realdentry); char buf[13]; int len; len = snprintf(buf, sizeof(buf), format, (int) (inode->i_nlink - realinode->i_nlink)); if (WARN_ON(len >= sizeof(buf))) return -EIO; return ovl_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry), OVL_XATTR_NLINK, buf, len); } int ovl_set_nlink_upper(struct dentry *dentry) { return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); } int ovl_set_nlink_lower(struct dentry *dentry) { return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); } unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, struct dentry *upperdentry, unsigned int fallback) { int nlink_diff; int nlink; char buf[13]; int err; if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) return fallback; err = ovl_getxattr_upper(ofs, upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); if (err < 0) goto fail; buf[err] = '\0'; if ((buf[0] != 'L' && buf[0] != 'U') || (buf[1] != '+' && buf[1] != '-')) goto fail; err = kstrtoint(buf + 1, 10, &nlink_diff); if (err < 0) goto fail; nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; nlink += nlink_diff; if (nlink <= 0) goto fail; return nlink; fail: pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n", upperdentry, err); return fallback; } struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) { struct inode *inode; inode = new_inode(sb); if (inode) ovl_fill_inode(inode, mode, rdev); return inode; } static int ovl_inode_test(struct inode *inode, void *data) { return inode->i_private == data; } static int ovl_inode_set(struct inode *inode, void *data) { inode->i_private = data; return 0; } static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, struct dentry *upperdentry, bool strict) { /* * For directories, @strict verify from lookup path performs consistency * checks, so NULL lower/upper in dentry must match NULL lower/upper in * inode. Non @strict verify from NFS handle decode path passes NULL for * 'unknown' lower/upper. */ if (S_ISDIR(inode->i_mode) && strict) { /* Real lower dir moved to upper layer under us? */ if (!lowerdentry && ovl_inode_lower(inode)) return false; /* Lookup of an uncovered redirect origin? */ if (!upperdentry && ovl_inode_upper(inode)) return false; } /* * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. * This happens when finding a copied up overlay inode for a renamed * or hardlinked overlay dentry and lower dentry cannot be followed * by origin because lower fs does not support file handles. */ if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) return false; /* * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. * This happens when finding a lower alias for a copied up hard link. */ if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) return false; return true; } struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, bool is_upper) { struct inode *inode, *key = d_inode(real); inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); if (!inode) return NULL; if (!ovl_verify_inode(inode, is_upper ? NULL : real, is_upper ? real : NULL, false)) { iput(inode); return ERR_PTR(-ESTALE); } return inode; } bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) { struct inode *key = d_inode(dir); struct inode *trap; bool res; trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); if (!trap) return false; res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && !ovl_inode_lower(trap); iput(trap); return res; } /* * Create an inode cache entry for layer root dir, that will intentionally * fail ovl_verify_inode(), so any lookup that will find some layer root * will fail. */ struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) { struct inode *key = d_inode(dir); struct inode *trap; if (!d_is_dir(dir)) return ERR_PTR(-ENOTDIR); trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); if (!trap) return ERR_PTR(-ENOMEM); if (!(trap->i_state & I_NEW)) { /* Conflicting layer roots? */ iput(trap); return ERR_PTR(-ELOOP); } trap->i_mode = S_IFDIR; trap->i_flags = S_DEAD; unlock_new_inode(trap); return trap; } /* * Does overlay inode need to be hashed by lower inode? */ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, struct dentry *lower, bool index) { struct ovl_fs *ofs = OVL_FS(sb); /* No, if pure upper */ if (!lower) return false; /* Yes, if already indexed */ if (index) return true; /* Yes, if won't be copied up */ if (!ovl_upper_mnt(ofs)) return true; /* No, if lower hardlink is or will be broken on copy up */ if ((upper || !ovl_indexdir(sb)) && !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) return false; /* No, if non-indexed upper with NFS export */ if (ofs->config.nfs_export && upper) return false; /* Otherwise, hash by lower inode for fsnotify */ return true; } static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, struct inode *key) { return newinode ? inode_insert5(newinode, (unsigned long) key, ovl_inode_test, ovl_inode_set, key) : iget5_locked(sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); } struct inode *ovl_get_inode(struct super_block *sb, struct ovl_inode_params *oip) { struct ovl_fs *ofs = OVL_FS(sb); struct dentry *upperdentry = oip->upperdentry; struct ovl_path *lowerpath = ovl_lowerpath(oip->oe); struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; struct inode *inode; struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; struct path realpath = { .dentry = upperdentry ?: lowerdentry, .mnt = upperdentry ? ovl_upper_mnt(ofs) : lowerpath->layer->mnt, }; bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, oip->index); int fsid = bylower ? lowerpath->layer->fsid : 0; bool is_dir; unsigned long ino = 0; int err = oip->newinode ? -EEXIST : -ENOMEM; if (!realinode) realinode = d_inode(lowerdentry); /* * Copy up origin (lower) may exist for non-indexed upper, but we must * not use lower as hash key if this is a broken hardlink. */ is_dir = S_ISDIR(realinode->i_mode); if (upperdentry || bylower) { struct inode *key = d_inode(bylower ? lowerdentry : upperdentry); unsigned int nlink = is_dir ? 1 : realinode->i_nlink; inode = ovl_iget5(sb, oip->newinode, key); if (!inode) goto out_err; if (!(inode->i_state & I_NEW)) { /* * Verify that the underlying files stored in the inode * match those in the dentry. */ if (!ovl_verify_inode(inode, lowerdentry, upperdentry, true)) { iput(inode); err = -ESTALE; goto out_err; } dput(upperdentry); ovl_free_entry(oip->oe); kfree(oip->redirect); kfree(oip->lowerdata_redirect); goto out; } /* Recalculate nlink for non-dir due to indexing */ if (!is_dir) nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); ino = key->i_ino; } else { /* Lower hardlink that will be broken on copy up */ inode = new_inode(sb); if (!inode) { err = -ENOMEM; goto out_err; } ino = realinode->i_ino; fsid = lowerpath->layer->fsid; } ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); ovl_inode_init(inode, oip, ino, fsid); if (upperdentry && ovl_is_impuredir(sb, upperdentry)) ovl_set_flag(OVL_IMPURE, inode); if (oip->index) ovl_set_flag(OVL_INDEX, inode); if (bylower) ovl_set_flag(OVL_CONST_INO, inode); /* Check for non-merge dir that may have whiteouts */ if (is_dir) { if (((upperdentry && lowerdentry) || ovl_numlower(oip->oe) > 1) || ovl_path_check_origin_xattr(ofs, &realpath)) { ovl_set_flag(OVL_WHITEOUTS, inode); } } /* Check for immutable/append-only inode flags in xattr */ if (upperdentry) ovl_check_protattr(inode, upperdentry); if (inode->i_state & I_NEW) unlock_new_inode(inode); out: return inode; out_err: pr_warn_ratelimited("failed to get inode (%i)\n", err); inode = ERR_PTR(err); goto out; }
177 178 177 177 178 165 166 165 177 177 175 177 177 178 177 165 1 1 1 177 178 167 165 167 11 165 177 166 177 11 11 11 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 // SPDX-License-Identifier: GPL-2.0-or-later /* Virtio ring implementation. * * Copyright 2007 Rusty Russell IBM Corporation */ #include <linux/virtio.h> #include <linux/virtio_ring.h> #include <linux/virtio_config.h> #include <linux/device.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/hrtimer.h> #include <linux/dma-mapping.h> #include <linux/kmsan.h> #include <linux/spinlock.h> #include <xen/xen.h> #ifdef DEBUG /* For development, we want to crash whenever the ring is screwed. */ #define BAD_RING(_vq, fmt, args...) \ do { \ dev_err(&(_vq)->vq.vdev->dev, \ "%s:"fmt, (_vq)->vq.name, ##args); \ BUG(); \ } while (0) /* Caller is supposed to guarantee no reentry. */ #define START_USE(_vq) \ do { \ if ((_vq)->in_use) \ panic("%s:in_use = %i\n", \ (_vq)->vq.name, (_vq)->in_use); \ (_vq)->in_use = __LINE__; \ } while (0) #define END_USE(_vq) \ do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) #define LAST_ADD_TIME_UPDATE(_vq) \ do { \ ktime_t now = ktime_get(); \ \ /* No kick or get, with .1 second between? Warn. */ \ if ((_vq)->last_add_time_valid) \ WARN_ON(ktime_to_ms(ktime_sub(now, \ (_vq)->last_add_time)) > 100); \ (_vq)->last_add_time = now; \ (_vq)->last_add_time_valid = true; \ } while (0) #define LAST_ADD_TIME_CHECK(_vq) \ do { \ if ((_vq)->last_add_time_valid) { \ WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ (_vq)->last_add_time)) > 100); \ } \ } while (0) #define LAST_ADD_TIME_INVALID(_vq) \ ((_vq)->last_add_time_valid = false) #else #define BAD_RING(_vq, fmt, args...) \ do { \ dev_err(&_vq->vq.vdev->dev, \ "%s:"fmt, (_vq)->vq.name, ##args); \ (_vq)->broken = true; \ } while (0) #define START_USE(vq) #define END_USE(vq) #define LAST_ADD_TIME_UPDATE(vq) #define LAST_ADD_TIME_CHECK(vq) #define LAST_ADD_TIME_INVALID(vq) #endif struct vring_desc_state_split { void *data; /* Data for callback. */ struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ }; struct vring_desc_state_packed { void *data; /* Data for callback. */ struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ u16 num; /* Descriptor list length. */ u16 last; /* The last desc state in a list. */ }; struct vring_desc_extra { dma_addr_t addr; /* Descriptor DMA addr. */ u32 len; /* Descriptor length. */ u16 flags; /* Descriptor flags. */ u16 next; /* The next desc state in a list. */ }; struct vring_virtqueue_split { /* Actual memory layout for this queue. */ struct vring vring; /* Last written value to avail->flags */ u16 avail_flags_shadow; /* * Last written value to avail->idx in * guest byte order. */ u16 avail_idx_shadow; /* Per-descriptor state. */ struct vring_desc_state_split *desc_state; struct vring_desc_extra *desc_extra; /* DMA address and size information */ dma_addr_t queue_dma_addr; size_t queue_size_in_bytes; /* * The parameters for creating vrings are reserved for creating new * vring. */ u32 vring_align; bool may_reduce_num; }; struct vring_virtqueue_packed { /* Actual memory layout for this queue. */ struct { unsigned int num; struct vring_packed_desc *desc; struct vring_packed_desc_event *driver; struct vring_packed_desc_event *device; } vring; /* Driver ring wrap counter. */ bool avail_wrap_counter; /* Avail used flags. */ u16 avail_used_flags; /* Index of the next avail descriptor. */ u16 next_avail_idx; /* * Last written value to driver->flags in * guest byte order. */ u16 event_flags_shadow; /* Per-descriptor state. */ struct vring_desc_state_packed *desc_state; struct vring_desc_extra *desc_extra; /* DMA address and size information */ dma_addr_t ring_dma_addr; dma_addr_t driver_event_dma_addr; dma_addr_t device_event_dma_addr; size_t ring_size_in_bytes; size_t event_size_in_bytes; }; struct vring_virtqueue { struct virtqueue vq; /* Is this a packed ring? */ bool packed_ring; /* Is DMA API used? */ bool use_dma_api; /* Can we use weak barriers? */ bool weak_barriers; /* Other side has made a mess, don't try any more. */ bool broken; /* Host supports indirect buffers */ bool indirect; /* Host publishes avail event idx */ bool event; /* Do DMA mapping by driver */ bool premapped; /* Do unmap or not for desc. Just when premapped is False and * use_dma_api is true, this is true. */ bool do_unmap; /* Head of free buffer list. */ unsigned int free_head; /* Number we've added since last sync. */ unsigned int num_added; /* Last used index we've seen. * for split ring, it just contains last used index * for packed ring: * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. */ u16 last_used_idx; /* Hint for event idx: already triggered no need to disable. */ bool event_triggered; union { /* Available for split ring */ struct vring_virtqueue_split split; /* Available for packed ring */ struct vring_virtqueue_packed packed; }; /* How to notify other side. FIXME: commonalize hcalls! */ bool (*notify)(struct virtqueue *vq); /* DMA, allocation, and size information */ bool we_own_ring; /* Device used for doing DMA */ struct device *dma_dev; #ifdef DEBUG /* They're supposed to lock for us. */ unsigned int in_use; /* Figure out if their kicks are too delayed. */ bool last_add_time_valid; ktime_t last_add_time; #endif }; static struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring_virtqueue_split *vring_split, struct virtio_device *vdev, bool weak_barriers, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, struct device *dma_dev); static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); static void vring_free(struct virtqueue *_vq); /* * Helpers. */ #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq) static bool virtqueue_use_indirect(const struct vring_virtqueue *vq, unsigned int total_sg) { /* * If the host supports indirect descriptor tables, and we have multiple * buffers, then go indirect. FIXME: tune this threshold */ return (vq->indirect && total_sg > 1 && vq->vq.num_free); } /* * Modern virtio devices have feature bits to specify whether they need a * quirk and bypass the IOMMU. If not there, just use the DMA API. * * If there, the interaction between virtio and DMA API is messy. * * On most systems with virtio, physical addresses match bus addresses, * and it doesn't particularly matter whether we use the DMA API. * * On some systems, including Xen and any system with a physical device * that speaks virtio behind a physical IOMMU, we must use the DMA API * for virtio DMA to work at all. * * On other systems, including SPARC and PPC64, virtio-pci devices are * enumerated as though they are behind an IOMMU, but the virtio host * ignores the IOMMU, so we must either pretend that the IOMMU isn't * there or somehow map everything as the identity. * * For the time being, we preserve historic behavior and bypass the DMA * API. * * TODO: install a per-device DMA ops structure that does the right thing * taking into account all the above quirks, and use the DMA API * unconditionally on data path. */ static bool vring_use_dma_api(const struct virtio_device *vdev) { if (!virtio_has_dma_quirk(vdev)) return true; /* Otherwise, we are left to guess. */ /* * In theory, it's possible to have a buggy QEMU-supposed * emulated Q35 IOMMU and Xen enabled at the same time. On * such a configuration, virtio has never worked and will * not work without an even larger kludge. Instead, enable * the DMA API if we're a Xen guest, which at least allows * all of the sensible Xen configurations to work correctly. */ if (xen_domain()) return true; return false; } size_t virtio_max_dma_size(const struct virtio_device *vdev) { size_t max_segment_size = SIZE_MAX; if (vring_use_dma_api(vdev)) max_segment_size = dma_max_mapping_size(vdev->dev.parent); return max_segment_size; } EXPORT_SYMBOL_GPL(virtio_max_dma_size); static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, dma_addr_t *dma_handle, gfp_t flag, struct device *dma_dev) { if (vring_use_dma_api(vdev)) { return dma_alloc_coherent(dma_dev, size, dma_handle, flag); } else { void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); if (queue) { phys_addr_t phys_addr = virt_to_phys(queue); *dma_handle = (dma_addr_t)phys_addr; /* * Sanity check: make sure we dind't truncate * the address. The only arches I can find that * have 64-bit phys_addr_t but 32-bit dma_addr_t * are certain non-highmem MIPS and x86 * configurations, but these configurations * should never allocate physical pages above 32 * bits, so this is fine. Just in case, throw a * warning and abort if we end up with an * unrepresentable address. */ if (WARN_ON_ONCE(*dma_handle != phys_addr)) { free_pages_exact(queue, PAGE_ALIGN(size)); return NULL; } } return queue; } } static void vring_free_queue(struct virtio_device *vdev, size_t size, void *queue, dma_addr_t dma_handle, struct device *dma_dev) { if (vring_use_dma_api(vdev)) dma_free_coherent(dma_dev, size, queue, dma_handle); else free_pages_exact(queue, PAGE_ALIGN(size)); } /* * The DMA ops on various arches are rather gnarly right now, and * making all of the arch DMA ops work on the vring device itself * is a mess. */ static struct device *vring_dma_dev(const struct vring_virtqueue *vq) { return vq->dma_dev; } /* Map one sg entry. */ static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg, enum dma_data_direction direction, dma_addr_t *addr) { if (vq->premapped) { *addr = sg_dma_address(sg); return 0; } if (!vq->use_dma_api) { /* * If DMA is not used, KMSAN doesn't know that the scatterlist * is initialized by the hardware. Explicitly check/unpoison it * depending on the direction. */ kmsan_handle_dma(sg_page(sg), sg->offset, sg->length, direction); *addr = (dma_addr_t)sg_phys(sg); return 0; } /* * We can't use dma_map_sg, because we don't use scatterlists in * the way it expects (we don't guarantee that the scatterlist * will exist for the lifetime of the mapping). */ *addr = dma_map_page(vring_dma_dev(vq), sg_page(sg), sg->offset, sg->length, direction); if (dma_mapping_error(vring_dma_dev(vq), *addr)) return -ENOMEM; return 0; } static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, void *cpu_addr, size_t size, enum dma_data_direction direction) { if (!vq->use_dma_api) return (dma_addr_t)virt_to_phys(cpu_addr); return dma_map_single(vring_dma_dev(vq), cpu_addr, size, direction); } static int vring_mapping_error(const struct vring_virtqueue *vq, dma_addr_t addr) { if (!vq->use_dma_api) return 0; return dma_mapping_error(vring_dma_dev(vq), addr); } static void virtqueue_init(struct vring_virtqueue *vq, u32 num) { vq->vq.num_free = num; if (vq->packed_ring) vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); else vq->last_used_idx = 0; vq->event_triggered = false; vq->num_added = 0; #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; #endif } /* * Split ring specific functions - *_split(). */ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, const struct vring_desc *desc) { u16 flags; if (!vq->do_unmap) return; flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); dma_unmap_page(vring_dma_dev(vq), virtio64_to_cpu(vq->vq.vdev, desc->addr), virtio32_to_cpu(vq->vq.vdev, desc->len), (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, unsigned int i) { struct vring_desc_extra *extra = vq->split.desc_extra; u16 flags; flags = extra[i].flags; if (flags & VRING_DESC_F_INDIRECT) { if (!vq->use_dma_api) goto out; dma_unmap_single(vring_dma_dev(vq), extra[i].addr, extra[i].len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { if (!vq->do_unmap) goto out; dma_unmap_page(vring_dma_dev(vq), extra[i].addr, extra[i].len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } out: return extra[i].next; } static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, unsigned int total_sg, gfp_t gfp) { struct vring_desc *desc; unsigned int i; /* * We require lowmem mappings for the descriptors because * otherwise virt_to_phys will give us bogus addresses in the * virtqueue. */ gfp &= ~__GFP_HIGHMEM; desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); if (!desc) return NULL; for (i = 0; i < total_sg; i++) desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); return desc; } static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, struct vring_desc *desc, unsigned int i, dma_addr_t addr, unsigned int len, u16 flags, bool indirect) { struct vring_virtqueue *vring = to_vvq(vq); struct vring_desc_extra *extra = vring->split.desc_extra; u16 next; desc[i].flags = cpu_to_virtio16(vq->vdev, flags); desc[i].addr = cpu_to_virtio64(vq->vdev, addr); desc[i].len = cpu_to_virtio32(vq->vdev, len); if (!indirect) { next = extra[i].next; desc[i].next = cpu_to_virtio16(vq->vdev, next); extra[i].addr = addr; extra[i].len = len; extra[i].flags = flags; } else next = virtio16_to_cpu(vq->vdev, desc[i].next); return next; } static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, unsigned int out_sgs, unsigned int in_sgs, void *data, void *ctx, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); struct scatterlist *sg; struct vring_desc *desc; unsigned int i, n, avail, descs_used, prev, err_idx; int head; bool indirect; START_USE(vq); BUG_ON(data == NULL); BUG_ON(ctx && vq->indirect); if (unlikely(vq->broken)) { END_USE(vq); return -EIO; } LAST_ADD_TIME_UPDATE(vq); BUG_ON(total_sg == 0); head = vq->free_head; if (virtqueue_use_indirect(vq, total_sg)) desc = alloc_indirect_split(_vq, total_sg, gfp); else { desc = NULL; WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); } if (desc) { /* Use a single buffer which doesn't continue */ indirect = true; /* Set up rest to use this indirect table. */ i = 0; descs_used = 1; } else { indirect = false; desc = vq->split.vring.desc; i = head; descs_used = total_sg; } if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", descs_used, vq->vq.num_free); /* FIXME: for historical reasons, we force a notify here if * there are outgoing parts to the buffer. Presumably the * host should service the ring ASAP. */ if (out_sgs) vq->notify(&vq->vq); if (indirect) kfree(desc); END_USE(vq); return -ENOSPC; } for (n = 0; n < out_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { dma_addr_t addr; if (vring_map_one_sg(vq, sg, DMA_TO_DEVICE, &addr)) goto unmap_release; prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, VRING_DESC_F_NEXT, indirect); } } for (; n < (out_sgs + in_sgs); n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { dma_addr_t addr; if (vring_map_one_sg(vq, sg, DMA_FROM_DEVICE, &addr)) goto unmap_release; prev = i; /* Note that we trust indirect descriptor * table since it use stream DMA mapping. */ i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, indirect); } } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); if (!indirect && vq->do_unmap) vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; if (indirect) { /* Now that the indirect table is filled in, map it. */ dma_addr_t addr = vring_map_single( vq, desc, total_sg * sizeof(struct vring_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) { if (vq->premapped) goto free_indirect; goto unmap_release; } virtqueue_add_desc_split(_vq, vq->split.vring.desc, head, addr, total_sg * sizeof(struct vring_desc), VRING_DESC_F_INDIRECT, false); } /* We're using some buffers from the free list. */ vq->vq.num_free -= descs_used; /* Update free pointer */ if (indirect) vq->free_head = vq->split.desc_extra[head].next; else vq->free_head = i; /* Store token and indirect buffer state. */ vq->split.desc_state[head].data = data; if (indirect) vq->split.desc_state[head].indir_desc = desc; else vq->split.desc_state[head].indir_desc = ctx; /* Put entry in available array (but don't update avail->idx until they * do sync). */ avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); /* Descriptors and available array need to be set before we expose the * new available array entries. */ virtio_wmb(vq->weak_barriers); vq->split.avail_idx_shadow++; vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->split.avail_idx_shadow); vq->num_added++; pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); /* This is very unlikely, but theoretically possible. Kick * just in case. */ if (unlikely(vq->num_added == (1 << 16) - 1)) virtqueue_kick(_vq); return 0; unmap_release: err_idx = i; if (indirect) i = 0; else i = head; for (n = 0; n < total_sg; n++) { if (i == err_idx) break; if (indirect) { vring_unmap_one_split_indirect(vq, &desc[i]); i = virtio16_to_cpu(_vq->vdev, desc[i].next); } else i = vring_unmap_one_split(vq, i); } free_indirect: if (indirect) kfree(desc); END_USE(vq); return -ENOMEM; } static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 new, old; bool needs_kick; START_USE(vq); /* We need to expose available array entries before checking avail * event. */ virtio_mb(vq->weak_barriers); old = vq->split.avail_idx_shadow - vq->num_added; new = vq->split.avail_idx_shadow; vq->num_added = 0; LAST_ADD_TIME_CHECK(vq); LAST_ADD_TIME_INVALID(vq); if (vq->event) { needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->split.vring)), new, old); } else { needs_kick = !(vq->split.vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY)); } END_USE(vq); return needs_kick; } static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, void **ctx) { unsigned int i, j; __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); /* Clear data ptr. */ vq->split.desc_state[head].data = NULL; /* Put back on free list: unmap first-level descriptors and find end */ i = head; while (vq->split.vring.desc[i].flags & nextflag) { vring_unmap_one_split(vq, i); i = vq->split.desc_extra[i].next; vq->vq.num_free++; } vring_unmap_one_split(vq, i); vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; /* Plus final descriptor */ vq->vq.num_free++; if (vq->indirect) { struct vring_desc *indir_desc = vq->split.desc_state[head].indir_desc; u32 len; /* Free the indirect table, if any, now that it's unmapped. */ if (!indir_desc) return; len = vq->split.desc_extra[head].len; BUG_ON(!(vq->split.desc_extra[head].flags & VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); if (vq->do_unmap) { for (j = 0; j < len / sizeof(struct vring_desc); j++) vring_unmap_one_split_indirect(vq, &indir_desc[j]); } kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; } else if (ctx) { *ctx = vq->split.desc_state[head].indir_desc; } } static bool more_used_split(const struct vring_virtqueue *vq) { return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->split.vring.used->idx); } static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, unsigned int *len, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); void *ret; unsigned int i; u16 last_used; START_USE(vq); if (unlikely(vq->broken)) { END_USE(vq); return NULL; } if (!more_used_split(vq)) { pr_debug("No more buffers in queue\n"); END_USE(vq); return NULL; } /* Only get used array entries after they have been exposed by host. */ virtio_rmb(vq->weak_barriers); last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); i = virtio32_to_cpu(_vq->vdev, vq->split.vring.used->ring[last_used].id); *len = virtio32_to_cpu(_vq->vdev, vq->split.vring.used->ring[last_used].len); if (unlikely(i >= vq->split.vring.num)) { BAD_RING(vq, "id %u out of range\n", i); return NULL; } if (unlikely(!vq->split.desc_state[i].data)) { BAD_RING(vq, "id %u is not a head!\n", i); return NULL; } /* detach_buf_split clears data, so grab it now. */ ret = vq->split.desc_state[i].data; detach_buf_split(vq, i, ctx); vq->last_used_idx++; /* If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before * the read in the next get_buf call. */ if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) virtio_store_mb(vq->weak_barriers, &vring_used_event(&vq->split.vring), cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); LAST_ADD_TIME_INVALID(vq); END_USE(vq); return ret; } static void virtqueue_disable_cb_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; /* * If device triggered an event already it won't trigger one again: * no need to disable. */ if (vq->event_triggered) return; if (vq->event) /* TODO: this is a hack. Figure out a cleaner value to write. */ vring_used_event(&vq->split.vring) = 0x0; else vq->split.vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->split.avail_flags_shadow); } } static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 last_used_idx; START_USE(vq); /* We optimistically turn back on interrupts, then check if there was * more to do. */ /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to * either clear the flags bit or point the event index at the next * entry. Always do both to keep code simple. */ if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; if (!vq->event) vq->split.vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->split.avail_flags_shadow); } vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx); END_USE(vq); return last_used_idx; } static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx); } static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 bufs; START_USE(vq); /* We optimistically turn back on interrupts, then check if there was * more to do. */ /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to * either clear the flags bit or point the event index at the next * entry. Always update the event index to keep code simple. */ if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; if (!vq->event) vq->split.vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->split.avail_flags_shadow); } /* TODO: tune this threshold */ bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; virtio_store_mb(vq->weak_barriers, &vring_used_event(&vq->split.vring), cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) - vq->last_used_idx) > bufs)) { END_USE(vq); return false; } END_USE(vq); return true; } static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i; void *buf; START_USE(vq); for (i = 0; i < vq->split.vring.num; i++) { if (!vq->split.desc_state[i].data) continue; /* detach_buf_split clears data, so grab it now. */ buf = vq->split.desc_state[i].data; detach_buf_split(vq, i, NULL); vq->split.avail_idx_shadow--; vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->split.avail_idx_shadow); END_USE(vq); return buf; } /* That should have freed everything. */ BUG_ON(vq->vq.num_free != vq->split.vring.num); END_USE(vq); return NULL; } static void virtqueue_vring_init_split(struct vring_virtqueue_split *vring_split, struct vring_virtqueue *vq) { struct virtio_device *vdev; vdev = vq->vq.vdev; vring_split->avail_flags_shadow = 0; vring_split->avail_idx_shadow = 0; /* No callback? Tell other side not to bother us. */ if (!vq->vq.callback) { vring_split->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; if (!vq->event) vring_split->vring.avail->flags = cpu_to_virtio16(vdev, vring_split->avail_flags_shadow); } } static void virtqueue_reinit_split(struct vring_virtqueue *vq) { int num; num = vq->split.vring.num; vq->split.vring.avail->flags = 0; vq->split.vring.avail->idx = 0; /* reset avail event */ vq->split.vring.avail->ring[num] = 0; vq->split.vring.used->flags = 0; vq->split.vring.used->idx = 0; /* reset used event */ *(__virtio16 *)&(vq->split.vring.used->ring[num]) = 0; virtqueue_init(vq, num); virtqueue_vring_init_split(&vq->split, vq); } static void virtqueue_vring_attach_split(struct vring_virtqueue *vq, struct vring_virtqueue_split *vring_split) { vq->split = *vring_split; /* Put everything in free lists. */ vq->free_head = 0; } static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) { struct vring_desc_state_split *state; struct vring_desc_extra *extra; u32 num = vring_split->vring.num; state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); if (!state) goto err_state; extra = vring_alloc_desc_extra(num); if (!extra) goto err_extra; memset(state, 0, num * sizeof(struct vring_desc_state_split)); vring_split->desc_state = state; vring_split->desc_extra = extra; return 0; err_extra: kfree(state); err_state: return -ENOMEM; } static void vring_free_split(struct vring_virtqueue_split *vring_split, struct virtio_device *vdev, struct device *dma_dev) { vring_free_queue(vdev, vring_split->queue_size_in_bytes, vring_split->vring.desc, vring_split->queue_dma_addr, dma_dev); kfree(vring_split->desc_state); kfree(vring_split->desc_extra); } static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, struct virtio_device *vdev, u32 num, unsigned int vring_align, bool may_reduce_num, struct device *dma_dev) { void *queue = NULL; dma_addr_t dma_addr; /* We assume num is a power of 2. */ if (!is_power_of_2(num)) { dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); return -EINVAL; } /* TODO: allocate each queue chunk individually */ for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, dma_dev); if (queue) break; if (!may_reduce_num) return -ENOMEM; } if (!num) return -ENOMEM; if (!queue) { /* Try to get a single page. You are my only hope! */ queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, GFP_KERNEL | __GFP_ZERO, dma_dev); } if (!queue) return -ENOMEM; vring_init(&vring_split->vring, num, queue, vring_align); vring_split->queue_dma_addr = dma_addr; vring_split->queue_size_in_bytes = vring_size(num, vring_align); vring_split->vring_align = vring_align; vring_split->may_reduce_num = may_reduce_num; return 0; } static struct virtqueue *vring_create_virtqueue_split( unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, struct device *dma_dev) { struct vring_virtqueue_split vring_split = {}; struct virtqueue *vq; int err; err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, may_reduce_num, dma_dev); if (err) return NULL; vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, context, notify, callback, name, dma_dev); if (!vq) { vring_free_split(&vring_split, vdev, dma_dev); return NULL; } to_vvq(vq)->we_own_ring = true; return vq; } static int virtqueue_resize_split(struct virtqueue *_vq, u32 num) { struct vring_virtqueue_split vring_split = {}; struct vring_virtqueue *vq = to_vvq(_vq); struct virtio_device *vdev = _vq->vdev; int err; err = vring_alloc_queue_split(&vring_split, vdev, num, vq->split.vring_align, vq->split.may_reduce_num, vring_dma_dev(vq)); if (err) goto err; err = vring_alloc_state_extra_split(&vring_split); if (err) goto err_state_extra; vring_free(&vq->vq); virtqueue_vring_init_split(&vring_split, vq); virtqueue_init(vq, vring_split.vring.num); virtqueue_vring_attach_split(vq, &vring_split); return 0; err_state_extra: vring_free_split(&vring_split, vdev, vring_dma_dev(vq)); err: virtqueue_reinit_split(vq); return -ENOMEM; } /* * Packed ring specific functions - *_packed(). */ static bool packed_used_wrap_counter(u16 last_used_idx) { return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); } static u16 packed_last_used(u16 last_used_idx) { return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); } static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, const struct vring_desc_extra *extra) { u16 flags; flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { if (!vq->use_dma_api) return; dma_unmap_single(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { if (!vq->do_unmap) return; dma_unmap_page(vring_dma_dev(vq), extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } } static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, const struct vring_packed_desc *desc) { u16 flags; if (!vq->do_unmap) return; flags = le16_to_cpu(desc->flags); dma_unmap_page(vring_dma_dev(vq), le64_to_cpu(desc->addr), le32_to_cpu(desc->len), (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, gfp_t gfp) { struct vring_packed_desc *desc; /* * We require lowmem mappings for the descriptors because * otherwise virt_to_phys will give us bogus addresses in the * virtqueue. */ gfp &= ~__GFP_HIGHMEM; desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); return desc; } static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, struct scatterlist *sgs[], unsigned int total_sg, unsigned int out_sgs, unsigned int in_sgs, void *data, gfp_t gfp) { struct vring_packed_desc *desc; struct scatterlist *sg; unsigned int i, n, err_idx; u16 head, id; dma_addr_t addr; head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); if (!desc) return -ENOMEM; if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); kfree(desc); END_USE(vq); return -ENOSPC; } i = 0; id = vq->free_head; BUG_ON(id == vq->packed.vring.num); for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { if (vring_map_one_sg(vq, sg, n < out_sgs ? DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; desc[i].flags = cpu_to_le16(n < out_sgs ? 0 : VRING_DESC_F_WRITE); desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(sg->length); i++; } } /* Now that the indirect table is filled in, map it. */ addr = vring_map_single(vq, desc, total_sg * sizeof(struct vring_packed_desc), DMA_TO_DEVICE); if (vring_mapping_error(vq, addr)) { if (vq->premapped) goto free_desc; goto unmap_release; } vq->packed.vring.desc[head].addr = cpu_to_le64(addr); vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * sizeof(struct vring_packed_desc)); vq->packed.vring.desc[head].id = cpu_to_le16(id); if (vq->use_dma_api) { vq->packed.desc_extra[id].addr = addr; vq->packed.desc_extra[id].len = total_sg * sizeof(struct vring_packed_desc); vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | vq->packed.avail_used_flags; } /* * A driver MUST NOT make the first descriptor in the list * available before all subsequent descriptors comprising * the list are made available. */ virtio_wmb(vq->weak_barriers); vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | vq->packed.avail_used_flags); /* We're using some buffers from the free list. */ vq->vq.num_free -= 1; /* Update free pointer */ n = head + 1; if (n >= vq->packed.vring.num) { n = 0; vq->packed.avail_wrap_counter ^= 1; vq->packed.avail_used_flags ^= 1 << VRING_PACKED_DESC_F_AVAIL | 1 << VRING_PACKED_DESC_F_USED; } vq->packed.next_avail_idx = n; vq->free_head = vq->packed.desc_extra[id].next; /* Store token and indirect buffer state. */ vq->packed.desc_state[id].num = 1; vq->packed.desc_state[id].data = data; vq->packed.desc_state[id].indir_desc = desc; vq->packed.desc_state[id].last = id; vq->num_added += 1; pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); return 0; unmap_release: err_idx = i; for (i = 0; i < err_idx; i++) vring_unmap_desc_packed(vq, &desc[i]); free_desc: kfree(desc); END_USE(vq); return -ENOMEM; } static inline int virtqueue_add_packed(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, unsigned int out_sgs, unsigned int in_sgs, void *data, void *ctx, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); struct vring_packed_desc *desc; struct scatterlist *sg; unsigned int i, n, c, descs_used, err_idx; __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; int err; START_USE(vq); BUG_ON(data == NULL); BUG_ON(ctx && vq->indirect); if (unlikely(vq->broken)) { END_USE(vq); return -EIO; } LAST_ADD_TIME_UPDATE(vq); BUG_ON(total_sg == 0); if (virtqueue_use_indirect(vq, total_sg)) { err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); if (err != -ENOMEM) { END_USE(vq); return err; } /* fall back on direct */ } head = vq->packed.next_avail_idx; avail_used_flags = vq->packed.avail_used_flags; WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); desc = vq->packed.vring.desc; i = head; descs_used = total_sg; if (unlikely(vq->vq.num_free < descs_used)) { pr_debug("Can't add buf len %i - avail = %i\n", descs_used, vq->vq.num_free); END_USE(vq); return -ENOSPC; } id = vq->free_head; BUG_ON(id == vq->packed.vring.num); curr = id; c = 0; for (n = 0; n < out_sgs + in_sgs; n++) { for (sg = sgs[n]; sg; sg = sg_next(sg)) { dma_addr_t addr; if (vring_map_one_sg(vq, sg, n < out_sgs ? DMA_TO_DEVICE : DMA_FROM_DEVICE, &addr)) goto unmap_release; flags = cpu_to_le16(vq->packed.avail_used_flags | (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); if (i == head) head_flags = flags; else desc[i].flags = flags; desc[i].addr = cpu_to_le64(addr); desc[i].len = cpu_to_le32(sg->length); desc[i].id = cpu_to_le16(id); if (unlikely(vq->use_dma_api)) { vq->packed.desc_extra[curr].addr = addr; vq->packed.desc_extra[curr].len = sg->length; vq->packed.desc_extra[curr].flags = le16_to_cpu(flags); } prev = curr; curr = vq->packed.desc_extra[curr].next; if ((unlikely(++i >= vq->packed.vring.num))) { i = 0; vq->packed.avail_used_flags ^= 1 << VRING_PACKED_DESC_F_AVAIL | 1 << VRING_PACKED_DESC_F_USED; } } } if (i <= head) vq->packed.avail_wrap_counter ^= 1; /* We're using some buffers from the free list. */ vq->vq.num_free -= descs_used; /* Update free pointer */ vq->packed.next_avail_idx = i; vq->free_head = curr; /* Store token. */ vq->packed.desc_state[id].num = descs_used; vq->packed.desc_state[id].data = data; vq->packed.desc_state[id].indir_desc = ctx; vq->packed.desc_state[id].last = prev; /* * A driver MUST NOT make the first descriptor in the list * available before all subsequent descriptors comprising * the list are made available. */ virtio_wmb(vq->weak_barriers); vq->packed.vring.desc[head].flags = head_flags; vq->num_added += descs_used; pr_debug("Added buffer head %i to %p\n", head, vq); END_USE(vq); return 0; unmap_release: err_idx = i; i = head; curr = vq->free_head; vq->packed.avail_used_flags = avail_used_flags; for (n = 0; n < total_sg; n++) { if (i == err_idx) break; vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; i++; if (i >= vq->packed.vring.num) i = 0; } END_USE(vq); return -EIO; } static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 new, old, off_wrap, flags, wrap_counter, event_idx; bool needs_kick; union { struct { __le16 off_wrap; __le16 flags; }; u32 u32; } snapshot; START_USE(vq); /* * We need to expose the new flags value before checking notification * suppressions. */ virtio_mb(vq->weak_barriers); old = vq->packed.next_avail_idx - vq->num_added; new = vq->packed.next_avail_idx; vq->num_added = 0; snapshot.u32 = *(u32 *)vq->packed.vring.device; flags = le16_to_cpu(snapshot.flags); LAST_ADD_TIME_CHECK(vq); LAST_ADD_TIME_INVALID(vq); if (flags != VRING_PACKED_EVENT_FLAG_DESC) { needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); goto out; } off_wrap = le16_to_cpu(snapshot.off_wrap); wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); if (wrap_counter != vq->packed.avail_wrap_counter) event_idx -= vq->packed.vring.num; needs_kick = vring_need_event(event_idx, new, old); out: END_USE(vq); return needs_kick; } static void detach_buf_packed(struct vring_virtqueue *vq, unsigned int id, void **ctx) { struct vring_desc_state_packed *state = NULL; struct vring_packed_desc *desc; unsigned int i, curr; state = &vq->packed.desc_state[id]; /* Clear data ptr. */ state->data = NULL; vq->packed.desc_extra[state->last].next = vq->free_head; vq->free_head = id; vq->vq.num_free += state->num; if (unlikely(vq->use_dma_api)) { curr = id; for (i = 0; i < state->num; i++) { vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; } } if (vq->indirect) { u32 len; /* Free the indirect table, if any, now that it's unmapped. */ desc = state->indir_desc; if (!desc) return; if (vq->do_unmap) { len = vq->packed.desc_extra[id].len; for (i = 0; i < len / sizeof(struct vring_packed_desc); i++) vring_unmap_desc_packed(vq, &desc[i]); } kfree(desc); state->indir_desc = NULL; } else if (ctx) { *ctx = state->indir_desc; } } static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, u16 idx, bool used_wrap_counter) { bool avail, used; u16 flags; flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); return avail == used && used == used_wrap_counter; } static bool more_used_packed(const struct vring_virtqueue *vq) { u16 last_used; u16 last_used_idx; bool used_wrap_counter; last_used_idx = READ_ONCE(vq->last_used_idx); last_used = packed_last_used(last_used_idx); used_wrap_counter = packed_used_wrap_counter(last_used_idx); return is_used_desc_packed(vq, last_used, used_wrap_counter); } static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, unsigned int *len, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); u16 last_used, id, last_used_idx; bool used_wrap_counter; void *ret; START_USE(vq); if (unlikely(vq->broken)) { END_USE(vq); return NULL; } if (!more_used_packed(vq)) { pr_debug("No more buffers in queue\n"); END_USE(vq); return NULL; } /* Only get used elements after they have been exposed by host. */ virtio_rmb(vq->weak_barriers); last_used_idx = READ_ONCE(vq->last_used_idx); used_wrap_counter = packed_used_wrap_counter(last_used_idx); last_used = packed_last_used(last_used_idx); id = le16_to_cpu(vq->packed.vring.desc[last_used].id); *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); if (unlikely(id >= vq->packed.vring.num)) { BAD_RING(vq, "id %u out of range\n", id); return NULL; } if (unlikely(!vq->packed.desc_state[id].data)) { BAD_RING(vq, "id %u is not a head!\n", id); return NULL; } /* detach_buf_packed clears data, so grab it now. */ ret = vq->packed.desc_state[id].data; detach_buf_packed(vq, id, ctx); last_used += vq->packed.desc_state[id].num; if (unlikely(last_used >= vq->packed.vring.num)) { last_used -= vq->packed.vring.num; used_wrap_counter ^= 1; } last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); WRITE_ONCE(vq->last_used_idx, last_used); /* * If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before * the read in the next get_buf call. */ if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) virtio_store_mb(vq->weak_barriers, &vq->packed.vring.driver->off_wrap, cpu_to_le16(vq->last_used_idx)); LAST_ADD_TIME_INVALID(vq); END_USE(vq); return ret; } static void virtqueue_disable_cb_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; /* * If device triggered an event already it won't trigger one again: * no need to disable. */ if (vq->event_triggered) return; vq->packed.vring.driver->flags = cpu_to_le16(vq->packed.event_flags_shadow); } } static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); START_USE(vq); /* * We optimistically turn back on interrupts, then check if there was * more to do. */ if (vq->event) { vq->packed.vring.driver->off_wrap = cpu_to_le16(vq->last_used_idx); /* * We need to update event offset and event wrap * counter first before updating event flags. */ virtio_wmb(vq->weak_barriers); } if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { vq->packed.event_flags_shadow = vq->event ? VRING_PACKED_EVENT_FLAG_DESC : VRING_PACKED_EVENT_FLAG_ENABLE; vq->packed.vring.driver->flags = cpu_to_le16(vq->packed.event_flags_shadow); } END_USE(vq); return vq->last_used_idx; } static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) { struct vring_virtqueue *vq = to_vvq(_vq); bool wrap_counter; u16 used_idx; wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); return is_used_desc_packed(vq, used_idx, wrap_counter); } static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 used_idx, wrap_counter, last_used_idx; u16 bufs; START_USE(vq); /* * We optimistically turn back on interrupts, then check if there was * more to do. */ if (vq->event) { /* TODO: tune this threshold */ bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; last_used_idx = READ_ONCE(vq->last_used_idx); wrap_counter = packed_used_wrap_counter(last_used_idx); used_idx = packed_last_used(last_used_idx) + bufs; if (used_idx >= vq->packed.vring.num) { used_idx -= vq->packed.vring.num; wrap_counter ^= 1; } vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); /* * We need to update event offset and event wrap * counter first before updating event flags. */ virtio_wmb(vq->weak_barriers); } if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { vq->packed.event_flags_shadow = vq->event ? VRING_PACKED_EVENT_FLAG_DESC : VRING_PACKED_EVENT_FLAG_ENABLE; vq->packed.vring.driver->flags = cpu_to_le16(vq->packed.event_flags_shadow); } /* * We need to update event suppression structure first * before re-checking for more used buffers. */ virtio_mb(vq->weak_barriers); last_used_idx = READ_ONCE(vq->last_used_idx); wrap_counter = packed_used_wrap_counter(last_used_idx); used_idx = packed_last_used(last_used_idx); if (is_used_desc_packed(vq, used_idx, wrap_counter)) { END_USE(vq); return false; } END_USE(vq); return true; } static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); unsigned int i; void *buf; START_USE(vq); for (i = 0; i < vq->packed.vring.num; i++) { if (!vq->packed.desc_state[i].data) continue; /* detach_buf clears data, so grab it now. */ buf = vq->packed.desc_state[i].data; detach_buf_packed(vq, i, NULL); END_USE(vq); return buf; } /* That should have freed everything. */ BUG_ON(vq->vq.num_free != vq->packed.vring.num); END_USE(vq); return NULL; } static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) { struct vring_desc_extra *desc_extra; unsigned int i; desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), GFP_KERNEL); if (!desc_extra) return NULL; memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); for (i = 0; i < num - 1; i++) desc_extra[i].next = i + 1; return desc_extra; } static void vring_free_packed(struct vring_virtqueue_packed *vring_packed, struct virtio_device *vdev, struct device *dma_dev) { if (vring_packed->vring.desc) vring_free_queue(vdev, vring_packed->ring_size_in_bytes, vring_packed->vring.desc, vring_packed->ring_dma_addr, dma_dev); if (vring_packed->vring.driver) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.driver, vring_packed->driver_event_dma_addr, dma_dev); if (vring_packed->vring.device) vring_free_queue(vdev, vring_packed->event_size_in_bytes, vring_packed->vring.device, vring_packed->device_event_dma_addr, dma_dev); kfree(vring_packed->desc_state); kfree(vring_packed->desc_extra); } static int vring_alloc_queue_packed(struct vring_virtqueue_packed *vring_packed, struct virtio_device *vdev, u32 num, struct device *dma_dev) { struct vring_packed_desc *ring; struct vring_packed_desc_event *driver, *device; dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; size_t ring_size_in_bytes, event_size_in_bytes; ring_size_in_bytes = num * sizeof(struct vring_packed_desc); ring = vring_alloc_queue(vdev, ring_size_in_bytes, &ring_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, dma_dev); if (!ring) goto err; vring_packed->vring.desc = ring; vring_packed->ring_dma_addr = ring_dma_addr; vring_packed->ring_size_in_bytes = ring_size_in_bytes; event_size_in_bytes = sizeof(struct vring_packed_desc_event); driver = vring_alloc_queue(vdev, event_size_in_bytes, &driver_event_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, dma_dev); if (!driver) goto err; vring_packed->vring.driver = driver; vring_packed->event_size_in_bytes = event_size_in_bytes; vring_packed->driver_event_dma_addr = driver_event_dma_addr; device = vring_alloc_queue(vdev, event_size_in_bytes, &device_event_dma_addr, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, dma_dev); if (!device) goto err; vring_packed->vring.device = device; vring_packed->device_event_dma_addr = device_event_dma_addr; vring_packed->vring.num = num; return 0; err: vring_free_packed(vring_packed, vdev, dma_dev); return -ENOMEM; } static int vring_alloc_state_extra_packed(struct vring_virtqueue_packed *vring_packed) { struct vring_desc_state_packed *state; struct vring_desc_extra *extra; u32 num = vring_packed->vring.num; state = kmalloc_array(num, sizeof(struct vring_desc_state_packed), GFP_KERNEL); if (!state) goto err_desc_state; memset(state, 0, num * sizeof(struct vring_desc_state_packed)); extra = vring_alloc_desc_extra(num); if (!extra) goto err_desc_extra; vring_packed->desc_state = state; vring_packed->desc_extra = extra; return 0; err_desc_extra: kfree(state); err_desc_state: return -ENOMEM; } static void virtqueue_vring_init_packed(struct vring_virtqueue_packed *vring_packed, bool callback) { vring_packed->next_avail_idx = 0; vring_packed->avail_wrap_counter = 1; vring_packed->event_flags_shadow = 0; vring_packed->avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; /* No callback? Tell other side not to bother us. */ if (!callback) { vring_packed->event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; vring_packed->vring.driver->flags = cpu_to_le16(vring_packed->event_flags_shadow); } } static void virtqueue_vring_attach_packed(struct vring_virtqueue *vq, struct vring_virtqueue_packed *vring_packed) { vq->packed = *vring_packed; /* Put everything in free lists. */ vq->free_head = 0; } static void virtqueue_reinit_packed(struct vring_virtqueue *vq) { memset(vq->packed.vring.device, 0, vq->packed.event_size_in_bytes); memset(vq->packed.vring.driver, 0, vq->packed.event_size_in_bytes); /* we need to reset the desc.flags. For more, see is_used_desc_packed() */ memset(vq->packed.vring.desc, 0, vq->packed.ring_size_in_bytes); virtqueue_init(vq, vq->packed.vring.num); virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback); } static struct virtqueue *vring_create_virtqueue_packed( unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, struct device *dma_dev) { struct vring_virtqueue_packed vring_packed = {}; struct vring_virtqueue *vq; int err; if (vring_alloc_queue_packed(&vring_packed, vdev, num, dma_dev)) goto err_ring; vq = kmalloc(sizeof(*vq), GFP_KERNEL); if (!vq) goto err_vq; vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.name = name; vq->vq.index = index; vq->vq.reset = false; vq->we_own_ring = true; vq->notify = notify; vq->weak_barriers = weak_barriers; #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION vq->broken = true; #else vq->broken = false; #endif vq->packed_ring = true; vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); vq->premapped = false; vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) vq->weak_barriers = false; err = vring_alloc_state_extra_packed(&vring_packed); if (err) goto err_state_extra; virtqueue_vring_init_packed(&vring_packed, !!callback); virtqueue_init(vq, num); virtqueue_vring_attach_packed(vq, &vring_packed); spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); spin_unlock(&vdev->vqs_list_lock); return &vq->vq; err_state_extra: kfree(vq); err_vq: vring_free_packed(&vring_packed, vdev, dma_dev); err_ring: return NULL; } static int virtqueue_resize_packed(struct virtqueue *_vq, u32 num) { struct vring_virtqueue_packed vring_packed = {}; struct vring_virtqueue *vq = to_vvq(_vq); struct virtio_device *vdev = _vq->vdev; int err; if (vring_alloc_queue_packed(&vring_packed, vdev, num, vring_dma_dev(vq))) goto err_ring; err = vring_alloc_state_extra_packed(&vring_packed); if (err) goto err_state_extra; vring_free(&vq->vq); virtqueue_vring_init_packed(&vring_packed, !!vq->vq.callback); virtqueue_init(vq, vring_packed.vring.num); virtqueue_vring_attach_packed(vq, &vring_packed); return 0; err_state_extra: vring_free_packed(&vring_packed, vdev, vring_dma_dev(vq)); err_ring: virtqueue_reinit_packed(vq); return -ENOMEM; } static int virtqueue_disable_and_recycle(struct virtqueue *_vq, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); struct virtio_device *vdev = vq->vq.vdev; void *buf; int err; if (!vq->we_own_ring) return -EPERM; if (!vdev->config->disable_vq_and_reset) return -ENOENT; if (!vdev->config->enable_vq_after_reset) return -ENOENT; err = vdev->config->disable_vq_and_reset(_vq); if (err) return err; while ((buf = virtqueue_detach_unused_buf(_vq)) != NULL) recycle(_vq, buf); return 0; } static int virtqueue_enable_after_reset(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); struct virtio_device *vdev = vq->vq.vdev; if (vdev->config->enable_vq_after_reset(_vq)) return -EBUSY; return 0; } /* * Generic functions and exported symbols. */ static inline int virtqueue_add(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, unsigned int out_sgs, unsigned int in_sgs, void *data, void *ctx, gfp_t gfp) { struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, out_sgs, in_sgs, data, ctx, gfp) : virtqueue_add_split(_vq, sgs, total_sg, out_sgs, in_sgs, data, ctx, gfp); } /** * virtqueue_add_sgs - expose buffers to other end * @_vq: the struct virtqueue we're talking about. * @sgs: array of terminated scatterlists. * @out_sgs: the number of scatterlists readable by other side * @in_sgs: the number of scatterlists which are writable (after readable ones) * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_sgs(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int out_sgs, unsigned int in_sgs, void *data, gfp_t gfp) { unsigned int i, total_sg = 0; /* Count them first. */ for (i = 0; i < out_sgs + in_sgs; i++) { struct scatterlist *sg; for (sg = sgs[i]; sg; sg = sg_next(sg)) total_sg++; } return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_sgs); /** * virtqueue_add_outbuf - expose output buffers to other end * @vq: the struct virtqueue we're talking about. * @sg: scatterlist (must be well-formed and terminated!) * @num: the number of entries in @sg readable by other side * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_outbuf(struct virtqueue *vq, struct scatterlist *sg, unsigned int num, void *data, gfp_t gfp) { return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); /** * virtqueue_add_inbuf - expose input buffers to other end * @vq: the struct virtqueue we're talking about. * @sg: scatterlist (must be well-formed and terminated!) * @num: the number of entries in @sg writable by other side * @data: the token identifying the buffer. * @gfp: how to do memory allocations (if necessary). * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf(struct virtqueue *vq, struct scatterlist *sg, unsigned int num, void *data, gfp_t gfp) { return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); /** * virtqueue_add_inbuf_ctx - expose input buffers to other end * @vq: the struct virtqueue we're talking about. * @sg: scatterlist (must be well-formed and terminated!) * @num: the number of entries in @sg writable by other side * @data: the token identifying the buffer. * @ctx: extra context for the token * @gfp: how to do memory allocations (if necessary). * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). */ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, struct scatterlist *sg, unsigned int num, void *data, void *ctx, gfp_t gfp) { return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); } EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); /** * virtqueue_dma_dev - get the dma dev * @_vq: the struct virtqueue we're talking about. * * Returns the dma dev. That can been used for dma api. */ struct device *virtqueue_dma_dev(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (vq->use_dma_api) return vring_dma_dev(vq); else return NULL; } EXPORT_SYMBOL_GPL(virtqueue_dma_dev); /** * virtqueue_kick_prepare - first half of split virtqueue_kick call. * @_vq: the struct virtqueue * * Instead of virtqueue_kick(), you can do: * if (virtqueue_kick_prepare(vq)) * virtqueue_notify(vq); * * This is sometimes useful because the virtqueue_kick_prepare() needs * to be serialized, but the actual virtqueue_notify() call does not. */ bool virtqueue_kick_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : virtqueue_kick_prepare_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); /** * virtqueue_notify - second half of split virtqueue_kick call. * @_vq: the struct virtqueue * * This does not need to be serialized. * * Returns false if host notify failed or queue is broken, otherwise true. */ bool virtqueue_notify(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (unlikely(vq->broken)) return false; /* Prod other side to tell it about changes. */ if (!vq->notify(_vq)) { vq->broken = true; return false; } return true; } EXPORT_SYMBOL_GPL(virtqueue_notify); /** * virtqueue_kick - update after add_buf * @vq: the struct virtqueue * * After one or more virtqueue_add_* calls, invoke this to kick * the other side. * * Caller must ensure we don't call this with other virtqueue * operations at the same time (except where noted). * * Returns false if kick failed, otherwise true. */ bool virtqueue_kick(struct virtqueue *vq) { if (virtqueue_kick_prepare(vq)) return virtqueue_notify(vq); return true; } EXPORT_SYMBOL_GPL(virtqueue_kick); /** * virtqueue_get_buf_ctx - get the next used buffer * @_vq: the struct virtqueue we're talking about. * @len: the length written into the buffer * @ctx: extra context for the token * * If the device wrote data into the buffer, @len will be set to the * amount written. This means you don't need to clear the buffer * beforehand to ensure there's no data leakage in the case of short * writes. * * Caller must ensure we don't call this with other virtqueue * operations at the same time (except where noted). * * Returns NULL if there are no used buffers, or the "data" token * handed to virtqueue_add_*(). */ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : virtqueue_get_buf_ctx_split(_vq, len, ctx); } EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) { return virtqueue_get_buf_ctx(_vq, len, NULL); } EXPORT_SYMBOL_GPL(virtqueue_get_buf); /** * virtqueue_disable_cb - disable callbacks * @_vq: the struct virtqueue we're talking about. * * Note that this is not necessarily synchronous, hence unreliable and only * useful as an optimization. * * Unlike other operations, this need not be serialized. */ void virtqueue_disable_cb(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (vq->packed_ring) virtqueue_disable_cb_packed(_vq); else virtqueue_disable_cb_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); /** * virtqueue_enable_cb_prepare - restart callbacks after disable_cb * @_vq: the struct virtqueue we're talking about. * * This re-enables callbacks; it returns current queue state * in an opaque unsigned value. This value should be later tested by * virtqueue_poll, to detect a possible race between the driver checking for * more work, and enabling callbacks. * * Caller must ensure we don't call this with other virtqueue * operations at the same time (except where noted). */ unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (vq->event_triggered) vq->event_triggered = false; return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : virtqueue_enable_cb_prepare_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); /** * virtqueue_poll - query pending used buffers * @_vq: the struct virtqueue we're talking about. * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). * * Returns "true" if there are pending used buffers in the queue. * * This does not need to be serialized. */ bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) { struct vring_virtqueue *vq = to_vvq(_vq); if (unlikely(vq->broken)) return false; virtio_mb(vq->weak_barriers); return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : virtqueue_poll_split(_vq, last_used_idx); } EXPORT_SYMBOL_GPL(virtqueue_poll); /** * virtqueue_enable_cb - restart callbacks after disable_cb. * @_vq: the struct virtqueue we're talking about. * * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. * * Caller must ensure we don't call this with other virtqueue * operations at the same time (except where noted). */ bool virtqueue_enable_cb(struct virtqueue *_vq) { unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); return !virtqueue_poll(_vq, last_used_idx); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); /** * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. * @_vq: the struct virtqueue we're talking about. * * This re-enables callbacks but hints to the other side to delay * interrupts until most of the available buffers have been processed; * it returns "false" if there are many pending buffers in the queue, * to detect a possible race between the driver checking for more work, * and enabling callbacks. * * Caller must ensure we don't call this with other virtqueue * operations at the same time (except where noted). */ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (vq->event_triggered) vq->event_triggered = false; return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : virtqueue_enable_cb_delayed_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); /** * virtqueue_detach_unused_buf - detach first unused buffer * @_vq: the struct virtqueue we're talking about. * * Returns NULL or the "data" token handed to virtqueue_add_*(). * This is not valid on an active queue; it is useful for device * shutdown or the reset queue. */ void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : virtqueue_detach_unused_buf_split(_vq); } EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); static inline bool more_used(const struct vring_virtqueue *vq) { return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); } /** * vring_interrupt - notify a virtqueue on an interrupt * @irq: the IRQ number (ignored) * @_vq: the struct virtqueue to notify * * Calls the callback function of @_vq to process the virtqueue * notification. */ irqreturn_t vring_interrupt(int irq, void *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (!more_used(vq)) { pr_debug("virtqueue interrupt with no work for %p\n", vq); return IRQ_NONE; } if (unlikely(vq->broken)) { #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION dev_warn_once(&vq->vq.vdev->dev, "virtio vring IRQ raised before DRIVER_OK"); return IRQ_NONE; #else return IRQ_HANDLED; #endif } /* Just a hint for performance: so it's ok that this can be racy! */ if (vq->event) vq->event_triggered = true; pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); if (vq->vq.callback) vq->vq.callback(&vq->vq); return IRQ_HANDLED; } EXPORT_SYMBOL_GPL(vring_interrupt); /* Only available for split ring */ static struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring_virtqueue_split *vring_split, struct virtio_device *vdev, bool weak_barriers, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, struct device *dma_dev) { struct vring_virtqueue *vq; int err; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return NULL; vq = kmalloc(sizeof(*vq), GFP_KERNEL); if (!vq) return NULL; vq->packed_ring = false; vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.name = name; vq->vq.index = index; vq->vq.reset = false; vq->we_own_ring = false; vq->notify = notify; vq->weak_barriers = weak_barriers; #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION vq->broken = true; #else vq->broken = false; #endif vq->dma_dev = dma_dev; vq->use_dma_api = vring_use_dma_api(vdev); vq->premapped = false; vq->do_unmap = vq->use_dma_api; vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && !context; vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) vq->weak_barriers = false; err = vring_alloc_state_extra_split(vring_split); if (err) { kfree(vq); return NULL; } virtqueue_vring_init_split(vring_split, vq); virtqueue_init(vq, vring_split->vring.num); virtqueue_vring_attach_split(vq, vring_split); spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); spin_unlock(&vdev->vqs_list_lock); return &vq->vq; } struct virtqueue *vring_create_virtqueue( unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name) { if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, context, notify, callback, name, vdev->dev.parent); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, context, notify, callback, name, vdev->dev.parent); } EXPORT_SYMBOL_GPL(vring_create_virtqueue); struct virtqueue *vring_create_virtqueue_dma( unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, bool context, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name, struct device *dma_dev) { if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return vring_create_virtqueue_packed(index, num, vring_align, vdev, weak_barriers, may_reduce_num, context, notify, callback, name, dma_dev); return vring_create_virtqueue_split(index, num, vring_align, vdev, weak_barriers, may_reduce_num, context, notify, callback, name, dma_dev); } EXPORT_SYMBOL_GPL(vring_create_virtqueue_dma); /** * virtqueue_resize - resize the vring of vq * @_vq: the struct virtqueue we're talking about. * @num: new ring num * @recycle: callback to recycle unused buffers * * When it is really necessary to create a new vring, it will set the current vq * into the reset state. Then call the passed callback to recycle the buffer * that is no longer used. Only after the new vring is successfully created, the * old vring will be released. * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * * Returns zero or a negative error. * 0: success. * -ENOMEM: Failed to allocate a new ring, fall back to the original ring size. * vq can still work normally * -EBUSY: Failed to sync with device, vq may not work properly * -ENOENT: Transport or device not supported * -E2BIG/-EINVAL: num error * -EPERM: Operation not permitted * */ int virtqueue_resize(struct virtqueue *_vq, u32 num, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); int err; if (num > vq->vq.num_max) return -E2BIG; if (!num) return -EINVAL; if ((vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num) == num) return 0; err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; if (vq->packed_ring) err = virtqueue_resize_packed(_vq, num); else err = virtqueue_resize_split(_vq, num); return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_resize); /** * virtqueue_set_dma_premapped - set the vring premapped mode * @_vq: the struct virtqueue we're talking about. * * Enable the premapped mode of the vq. * * The vring in premapped mode does not do dma internally, so the driver must * do dma mapping in advance. The driver must pass the dma_address through * dma_address of scatterlist. When the driver got a used buffer from * the vring, it has to unmap the dma address. * * This function must be called immediately after creating the vq, or after vq * reset, and before adding any buffers to it. * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * * Returns zero or a negative error. * 0: success. * -EINVAL: too late to enable premapped mode, the vq already contains buffers. */ int virtqueue_set_dma_premapped(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u32 num; START_USE(vq); num = vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; if (num != vq->vq.num_free) { END_USE(vq); return -EINVAL; } vq->premapped = true; vq->do_unmap = false; END_USE(vq); return 0; } EXPORT_SYMBOL_GPL(virtqueue_set_dma_premapped); /** * virtqueue_reset - detach and recycle all unused buffers * @_vq: the struct virtqueue we're talking about. * @recycle: callback to recycle unused buffers * * Caller must ensure we don't call this with other virtqueue operations * at the same time (except where noted). * * Returns zero or a negative error. * 0: success. * -EBUSY: Failed to sync with device, vq may not work properly * -ENOENT: Transport or device not supported * -EPERM: Operation not permitted */ int virtqueue_reset(struct virtqueue *_vq, void (*recycle)(struct virtqueue *vq, void *buf)) { struct vring_virtqueue *vq = to_vvq(_vq); int err; err = virtqueue_disable_and_recycle(_vq, recycle); if (err) return err; if (vq->packed_ring) virtqueue_reinit_packed(vq); else virtqueue_reinit_split(vq); return virtqueue_enable_after_reset(_vq); } EXPORT_SYMBOL_GPL(virtqueue_reset); /* Only available for split ring */ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int num, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, bool context, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name) { struct vring_virtqueue_split vring_split = {}; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) return NULL; vring_init(&vring_split.vring, num, pages, vring_align); return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, context, notify, callback, name, vdev->dev.parent); } EXPORT_SYMBOL_GPL(vring_new_virtqueue); static void vring_free(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); if (vq->we_own_ring) { if (vq->packed_ring) { vring_free_queue(vq->vq.vdev, vq->packed.ring_size_in_bytes, vq->packed.vring.desc, vq->packed.ring_dma_addr, vring_dma_dev(vq)); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.driver, vq->packed.driver_event_dma_addr, vring_dma_dev(vq)); vring_free_queue(vq->vq.vdev, vq->packed.event_size_in_bytes, vq->packed.vring.device, vq->packed.device_event_dma_addr, vring_dma_dev(vq)); kfree(vq->packed.desc_state); kfree(vq->packed.desc_extra); } else { vring_free_queue(vq->vq.vdev, vq->split.queue_size_in_bytes, vq->split.vring.desc, vq->split.queue_dma_addr, vring_dma_dev(vq)); } } if (!vq->packed_ring) { kfree(vq->split.desc_state); kfree(vq->split.desc_extra); } } void vring_del_virtqueue(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); spin_lock(&vq->vq.vdev->vqs_list_lock); list_del(&_vq->list); spin_unlock(&vq->vq.vdev->vqs_list_lock); vring_free(_vq); kfree(vq); } EXPORT_SYMBOL_GPL(vring_del_virtqueue); u32 vring_notification_data(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); u16 next; if (vq->packed_ring) next = (vq->packed.next_avail_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR))) | vq->packed.avail_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR; else next = vq->split.avail_idx_shadow; return next << 16 | _vq->index; } EXPORT_SYMBOL_GPL(vring_notification_data); /* Manipulates transport-specific feature bits. */ void vring_transport_features(struct virtio_device *vdev) { unsigned int i; for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { switch (i) { case VIRTIO_RING_F_INDIRECT_DESC: break; case VIRTIO_RING_F_EVENT_IDX: break; case VIRTIO_F_VERSION_1: break; case VIRTIO_F_ACCESS_PLATFORM: break; case VIRTIO_F_RING_PACKED: break; case VIRTIO_F_ORDER_PLATFORM: break; case VIRTIO_F_NOTIFICATION_DATA: break; default: /* We don't understand this bit. */ __virtio_clear_bit(vdev, i); } } } EXPORT_SYMBOL_GPL(vring_transport_features); /** * virtqueue_get_vring_size - return the size of the virtqueue's vring * @_vq: the struct virtqueue containing the vring of interest. * * Returns the size of the vring. This is mainly used for boasting to * userspace. Unlike other operations, this need not be serialized. */ unsigned int virtqueue_get_vring_size(const struct virtqueue *_vq) { const struct vring_virtqueue *vq = to_vvq(_vq); return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; } EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); /* * This function should only be called by the core, not directly by the driver. */ void __virtqueue_break(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ WRITE_ONCE(vq->broken, true); } EXPORT_SYMBOL_GPL(__virtqueue_break); /* * This function should only be called by the core, not directly by the driver. */ void __virtqueue_unbreak(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ WRITE_ONCE(vq->broken, false); } EXPORT_SYMBOL_GPL(__virtqueue_unbreak); bool virtqueue_is_broken(const struct virtqueue *_vq) { const struct vring_virtqueue *vq = to_vvq(_vq); return READ_ONCE(vq->broken); } EXPORT_SYMBOL_GPL(virtqueue_is_broken); /* * This should prevent the device from being used, allowing drivers to * recover. You may need to grab appropriate locks to flush. */ void virtio_break_device(struct virtio_device *dev) { struct virtqueue *_vq; spin_lock(&dev->vqs_list_lock); list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ WRITE_ONCE(vq->broken, true); } spin_unlock(&dev->vqs_list_lock); } EXPORT_SYMBOL_GPL(virtio_break_device); /* * This should allow the device to be used by the driver. You may * need to grab appropriate locks to flush the write to * vq->broken. This should only be used in some specific case e.g * (probing and restoring). This function should only be called by the * core, not directly by the driver. */ void __virtio_unbreak_device(struct virtio_device *dev) { struct virtqueue *_vq; spin_lock(&dev->vqs_list_lock); list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ WRITE_ONCE(vq->broken, false); } spin_unlock(&dev->vqs_list_lock); } EXPORT_SYMBOL_GPL(__virtio_unbreak_device); dma_addr_t virtqueue_get_desc_addr(const struct virtqueue *_vq) { const struct vring_virtqueue *vq = to_vvq(_vq); BUG_ON(!vq->we_own_ring); if (vq->packed_ring) return vq->packed.ring_dma_addr; return vq->split.queue_dma_addr; } EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *_vq) { const struct vring_virtqueue *vq = to_vvq(_vq); BUG_ON(!vq->we_own_ring); if (vq->packed_ring) return vq->packed.driver_event_dma_addr; return vq->split.queue_dma_addr + ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); } EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); dma_addr_t virtqueue_get_used_addr(const struct virtqueue *_vq) { const struct vring_virtqueue *vq = to_vvq(_vq); BUG_ON(!vq->we_own_ring); if (vq->packed_ring) return vq->packed.device_event_dma_addr; return vq->split.queue_dma_addr + ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); } EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); /* Only available for split ring */ const struct vring *virtqueue_get_vring(const struct virtqueue *vq) { return &to_vvq(vq)->split.vring; } EXPORT_SYMBOL_GPL(virtqueue_get_vring); /** * virtqueue_dma_map_single_attrs - map DMA for _vq * @_vq: the struct virtqueue we're talking about. * @ptr: the pointer of the buffer to do dma * @size: the size of the buffer to do dma * @dir: DMA direction * @attrs: DMA Attrs * * The caller calls this to do dma mapping in advance. The DMA address can be * passed to this _vq when it is in pre-mapped mode. * * return DMA address. Caller should check that by virtqueue_dma_mapping_error(). */ dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) { kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir); return (dma_addr_t)virt_to_phys(ptr); } return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); } EXPORT_SYMBOL_GPL(virtqueue_dma_map_single_attrs); /** * virtqueue_dma_unmap_single_attrs - unmap DMA for _vq * @_vq: the struct virtqueue we're talking about. * @addr: the dma address to unmap * @size: the size of the buffer * @dir: DMA direction * @attrs: DMA Attrs * * Unmap the address that is mapped by the virtqueue_dma_map_* APIs. * */ void virtqueue_dma_unmap_single_attrs(struct virtqueue *_vq, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) return; dma_unmap_single_attrs(vring_dma_dev(vq), addr, size, dir, attrs); } EXPORT_SYMBOL_GPL(virtqueue_dma_unmap_single_attrs); /** * virtqueue_dma_mapping_error - check dma address * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * * Returns 0 means dma valid. Other means invalid dma address. */ int virtqueue_dma_mapping_error(struct virtqueue *_vq, dma_addr_t addr) { struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) return 0; return dma_mapping_error(vring_dma_dev(vq), addr); } EXPORT_SYMBOL_GPL(virtqueue_dma_mapping_error); /** * virtqueue_dma_need_sync - check a dma address needs sync * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * * Check if the dma address mapped by the virtqueue_dma_map_* APIs needs to be * synchronized * * return bool */ bool virtqueue_dma_need_sync(struct virtqueue *_vq, dma_addr_t addr) { struct vring_virtqueue *vq = to_vvq(_vq); if (!vq->use_dma_api) return false; return dma_need_sync(vring_dma_dev(vq), addr); } EXPORT_SYMBOL_GPL(virtqueue_dma_need_sync); /** * virtqueue_dma_sync_single_range_for_cpu - dma sync for cpu * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * @offset: DMA address offset * @size: buf size for sync * @dir: DMA direction * * Before calling this function, use virtqueue_dma_need_sync() to confirm that * the DMA address really needs to be synchronized * */ void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { struct vring_virtqueue *vq = to_vvq(_vq); struct device *dev = vring_dma_dev(vq); if (!vq->use_dma_api) return; dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); } EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_cpu); /** * virtqueue_dma_sync_single_range_for_device - dma sync for device * @_vq: the struct virtqueue we're talking about. * @addr: DMA address * @offset: DMA address offset * @size: buf size for sync * @dir: DMA direction * * Before calling this function, use virtqueue_dma_need_sync() to confirm that * the DMA address really needs to be synchronized */ void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { struct vring_virtqueue *vq = to_vvq(_vq); struct device *dev = vring_dma_dev(vq); if (!vq->use_dma_api) return; dma_sync_single_range_for_device(dev, addr, offset, size, dir); } EXPORT_SYMBOL_GPL(virtqueue_dma_sync_single_range_for_device); MODULE_DESCRIPTION("Virtio ring implementation"); MODULE_LICENSE("GPL");
306 305 117 133 101 314 2 47 186 66 1 64 194 180 377 377 110 323 8 44 249 250 2 111 322 207 209 208 210 210 210 210 69 30 7 54 22 63 5 88 88 59 8 46 6 88 87 10 79 32 32 12 135 119 27 27 27 118 65 65 64 4 1 3 64 64 63 2 64 2 30 251 250 251 250 219 30 251 220 21 2 1 1 2 1 4 62 62 62 63 39 25 25 42 22 42 20 20 273 272 89 44 38 43 176 2 8 5 5 136 379 57 66 66 44 22 157 122 326 324 326 116 220 69 68 69 12 64 19 68 68 69 68 6 279 149 25 127 300 99 221 220 220 221 221 221 221 221 221 219 220 220 221 171 64 112 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic INET transport hashtables * * Authors: Lotsa people, from code originally in tcp */ #include <linux/module.h> #include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/vmalloc.h> #include <linux/memblock.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/inet6_hashtables.h> #endif #include <net/secure_seq.h> #include <net/hotdata.h> #include <net/ip.h> #include <net/tcp.h> #include <net/sock_reuseport.h> u32 inet_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); return __inet_ehashfn(laddr, lport, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet_ehashfn); /* This function handles inet_sock, but also timewait and request sockets * for IPv4/IPv6. */ static u32 sk_ehashfn(const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) return inet6_ehashfn(sock_net(sk), &sk->sk_v6_rcv_saddr, sk->sk_num, &sk->sk_v6_daddr, sk->sk_dport); #endif return inet_ehashfn(sock_net(sk), sk->sk_rcv_saddr, sk->sk_num, sk->sk_daddr, sk->sk_dport); } /* * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum, int l3mdev) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb) { write_pnet(&tb->ib_net, net); tb->l3mdev = l3mdev; tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; INIT_HLIST_HEAD(&tb->bhash2); hlist_add_head(&tb->node, &head->chain); } return tb; } /* * Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->bhash2)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } } bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net, unsigned short port, int l3mdev) { return net_eq(ib_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev; } static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb2, struct net *net, struct inet_bind_hashbucket *head, struct inet_bind_bucket *tb, const struct sock *sk) { write_pnet(&tb2->ib_net, net); tb2->l3mdev = tb->l3mdev; tb2->port = tb->port; #if IS_ENABLED(CONFIG_IPV6) BUILD_BUG_ON(USHRT_MAX < (IPV6_ADDR_ANY | IPV6_ADDR_MAPPED)); if (sk->sk_family == AF_INET6) { tb2->addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); tb2->v6_rcv_saddr = sk->sk_v6_rcv_saddr; } else { tb2->addr_type = IPV6_ADDR_MAPPED; ipv6_addr_set_v4mapped(sk->sk_rcv_saddr, &tb2->v6_rcv_saddr); } #else tb2->rcv_saddr = sk->sk_rcv_saddr; #endif INIT_HLIST_HEAD(&tb2->owners); hlist_add_head(&tb2->node, &head->chain); hlist_add_head(&tb2->bhash_node, &tb->bhash2); } struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, struct inet_bind_bucket *tb, const struct sock *sk) { struct inet_bind2_bucket *tb2 = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb2) inet_bind2_bucket_init(tb2, net, head, tb, sk); return tb2; } /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); __hlist_del(&tb->bhash_node); kmem_cache_free(cachep, tb); } } static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); if (tb2->addr_type != IPV6_ADDR_MAPPED) return false; #endif return tb2->rcv_saddr == sk->sk_rcv_saddr; } void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { inet_sk(sk)->inet_num = port; inet_csk(sk)->icsk_bind_hash = tb; inet_csk(sk)->icsk_bind2_hash = tb2; sk_add_bind_node(sk, &tb2->owners); } /* * Get rid of any references to a local port held by the given sock. */ static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); struct inet_bind_bucket *tb; int bhash; bhash = inet_bhashfn(net, inet_sk(sk)->inet_num, hashinfo->bhash_size); head = &hashinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hashinfo, sk, net, inet_sk(sk)->inet_num); spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; spin_lock(&head2->lock); if (inet_csk(sk)->icsk_bind2_hash) { struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash; __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind2_hash = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); } spin_unlock(&head2->lock); inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } void inet_put_port(struct sock *sk) { local_bh_disable(); __inet_put_port(sk); local_bh_enable(); } EXPORT_SYMBOL(inet_put_port); int __inet_inherit_port(const struct sock *sk, struct sock *child) { struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk); unsigned short port = inet_sk(child)->inet_num; struct inet_bind_hashbucket *head, *head2; bool created_inet_bind_bucket = false; struct net *net = sock_net(sk); bool update_fastreuse = false; struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; int bhash, l3mdev; bhash = inet_bhashfn(net, port, table->bhash_size); head = &table->bhash[bhash]; head2 = inet_bhashfn_portaddr(table, child, net, port); spin_lock(&head->lock); spin_lock(&head2->lock); tb = inet_csk(sk)->icsk_bind_hash; tb2 = inet_csk(sk)->icsk_bind2_hash; if (unlikely(!tb || !tb2)) { spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOENT; } if (tb->port != port) { l3mdev = inet_sk_bound_l3mdev(sk); /* NOTE: using tproxy and redirecting skbs to a proxy * on a different listener port breaks the assumption * that the listener socket's icsk_bind_hash is the same * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ inet_bind_bucket_for_each(tb, &head->chain) { if (inet_bind_bucket_match(tb, net, port, l3mdev)) break; } if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, net, head, port, l3mdev); if (!tb) { spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; } created_inet_bind_bucket = true; } update_fastreuse = true; goto bhash2_find; } else if (!inet_bind2_bucket_addr_match(tb2, child)) { l3mdev = inet_sk_bound_l3mdev(sk); bhash2_find: tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child); if (!tb2) { tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, net, head2, tb, child); if (!tb2) goto error; } } if (update_fastreuse) inet_csk_update_fastreuse(tb, child); inet_bind_hash(child, tb, tb2, port); spin_unlock(&head2->lock); spin_unlock(&head->lock); return 0; error: if (created_inet_bind_bucket) inet_bind_bucket_destroy(table->bind_bucket_cachep, tb); spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; } EXPORT_SYMBOL_GPL(__inet_inherit_port); static struct inet_listen_hashbucket * inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) { u32 hash; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); else #endif hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); return inet_lhash2_bucket(h, hash); } static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, const int dif, const int sdif) { int score = -1; if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && !ipv6_only_sock(sk)) { if (sk->sk_rcv_saddr != daddr) return -1; if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; score = sk->sk_bound_dev_if ? 2 : 1; if (sk->sk_family == PF_INET) score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; } /** * inet_lookup_reuseport() - execute reuseport logic on AF_INET socket if necessary. * @net: network namespace. * @sk: AF_INET socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. * @skb: context for a potential SK_REUSEPORT program. * @doff: header offset. * @saddr: source address. * @sport: source port. * @daddr: destination address. * @hnum: destination port in host byte order. * @ehashfn: hash function used to generate the fallback hash. * * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to * the selected sock or an error. */ struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, unsigned short hnum, inet_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { phash = INDIRECT_CALL_2(ehashfn, udp_ehashfn, inet_ehashfn, net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } EXPORT_SYMBOL_GPL(inet_lookup_reuseport); /* * Here are some nice properties to exploit here. The BSD API * does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ /* called with rcu_read_lock() : No refcount taken on the socket */ static struct sock *inet_lhash2_lookup(struct net *net, struct inet_listen_hashbucket *ilb2, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { struct sock *sk, *result = NULL; struct hlist_nulls_node *node; int score, hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, inet_ehashfn); if (result) return result; result = sk; hiscore = score; } } return result; } struct sock *inet_lookup_run_sk_lookup(struct net *net, int protocol, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, u16 hnum, const int dif, inet_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool no_reuseport; no_reuseport = bpf_sk_lookup_run_v4(net, protocol, saddr, sport, daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, ehashfn); if (reuse_sk) sk = reuse_sk; return sk; } struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && hashinfo == net->ipv4.tcp_death_row.hashinfo) { result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet_ehashfn); if (result) goto done; } hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); if (result) goto done; /* Lookup lhash2 with INADDR_ANY */ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* All sockets share common refcount, but have different destructors */ void sock_gen_put(struct sock *sk) { if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_free(inet_twsk(sk)); else if (sk->sk_state == TCP_NEW_SYN_RECV) reqsk_free(inet_reqsk(sk)); else sk_free(sk); } EXPORT_SYMBOL_GPL(sock_gen_put); void sock_edemux(struct sk_buff *skb) { sock_gen_put(skb->sk); } EXPORT_SYMBOL(sock_edemux); struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif, const int sdif) { INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!inet_match(net, sk, acookie, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } goto found; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != slot) goto begin; out: sk = NULL; found: return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_established); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, struct inet_timewait_sock **twp) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; struct net *net = sock_net(sk); int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (sk->sk_protocol == IPPROTO_TCP && tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; } } /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, inet->inet_daddr, inet->inet_dport); } /* Searches for an exsiting socket in the ehash bucket list. * Returns true if found, false otherwise. */ static bool inet_ehash_lookup_by_sk(struct sock *sk, struct hlist_nulls_head *list) { const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); const int sdif = sk->sk_bound_dev_if; const int dif = sk->sk_bound_dev_if; const struct hlist_nulls_node *node; struct net *net = sock_net(sk); struct sock *esk; INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); sk_nulls_for_each_rcu(esk, node, list) { if (esk->sk_hash != sk->sk_hash) continue; if (sk->sk_family == AF_INET) { if (unlikely(inet_match(net, esk, acookie, ports, dif, sdif))) { return true; } } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { if (unlikely(inet6_match(net, esk, &sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr, ports, dif, sdif))) { return true; } } #endif } return false; } /* Insert a socket into ehash, and eventually remove another one * (The another one can be a SYN_RECV or TIMEWAIT) * If an existing socket already exists, socket sk is not inserted, * and sets found_dup_sk parameter to true. */ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_ehash_bucket *head; struct hlist_nulls_head *list; spinlock_t *lock; bool ret = true; WARN_ON_ONCE(!sk_unhashed(sk)); sk->sk_hash = sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); ret = sk_nulls_del_node_init_rcu(osk); } else if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; } if (ret) __sk_nulls_add_node_rcu(sk, list); spin_unlock(lock); return ret; } bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) { bool ok = inet_ehash_insert(sk, osk, found_dup_sk); if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { this_cpu_inc(*sk->sk_prot->orphan_count); inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); } return ok; } EXPORT_SYMBOL_GPL(inet_ehash_nolisten); static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; const struct hlist_nulls_node *node; struct sock *sk2; kuid_t uid = sock_i_uid(sk); sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && inet_rcv_saddr_equal(sk, sk2, false)) return reuseport_add_sock(sk, sk2, inet_rcv_saddr_any(sk)); } return reuseport_alloc(sk, inet_rcv_saddr_any(sk)); } int __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_listen_hashbucket *ilb2; int err = 0; if (sk->sk_state != TCP_LISTEN) { local_bh_disable(); inet_ehash_nolisten(sk, osk, NULL); local_bh_enable(); return 0; } WARN_ON(!sk_unhashed(sk)); ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); spin_lock(&ilb2->lock); if (sk->sk_reuseport) { err = inet_reuseport_add_sock(sk, ilb2); if (err) goto unlock; } sock_set_flag(sk, SOCK_RCU_FREE); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head); else __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb2->lock); return err; } EXPORT_SYMBOL(__inet_hash); int inet_hash(struct sock *sk) { int err = 0; if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); return err; } EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); if (sk_unhashed(sk)) return; if (sk->sk_state == TCP_LISTEN) { struct inet_listen_hashbucket *ilb2; ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); /* Don't disable bottom halves while acquiring the lock to * avoid circular locking dependency on PREEMPT_RT. */ spin_lock(&ilb2->lock); if (sk_unhashed(sk)) { spin_unlock(&ilb2->lock); return; } if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_stop_listen_sock(sk); __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&ilb2->lock); } else { spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); if (sk_unhashed(sk)) { spin_unlock_bh(lock); return; } __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); } } EXPORT_SYMBOL_GPL(inet_unhash); static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { if (!net_eq(ib2_net(tb), net) || tb->port != port || tb->l3mdev != l3mdev) return false; return inet_bind2_bucket_addr_match(tb, sk); } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { if (!net_eq(ib2_net(tb), net) || tb->port != port || tb->l3mdev != l3mdev) return false; #if IS_ENABLED(CONFIG_IPV6) if (tb->addr_type == IPV6_ADDR_ANY) return true; if (tb->addr_type != IPV6_ADDR_MAPPED) return false; if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) return false; #endif return tb->rcv_saddr == 0; } /* The socket's bhash2 hashbucket spinlock must be held when this is called */ struct inet_bind2_bucket * inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { struct inet_bind2_bucket *bhash2 = NULL; inet_bind_bucket_for_each(bhash2, &head->chain) if (inet_bind2_bucket_match(bhash2, net, port, l3mdev, sk)) break; return bhash2; } struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); u32 hash; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) hash = ipv6_portaddr_hash(net, &in6addr_any, port); else #endif hash = ipv4_portaddr_hash(net, 0, port); return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } static void inet_update_saddr(struct sock *sk, void *saddr, int family) { if (family == AF_INET) { inet_sk(sk)->inet_saddr = *(__be32 *)saddr; sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); } #if IS_ENABLED(CONFIG_IPV6) else { sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; } #endif } static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); int bhash; if (!inet_csk(sk)->icsk_bind2_hash) { /* Not bind()ed before. */ if (reset) inet_reset_saddr(sk); else inet_update_saddr(sk, saddr, family); return 0; } /* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); if (!new_tb2) { if (reset) { /* The (INADDR_ANY, port) bucket might have already * been freed, then we cannot fixup icsk_bind2_hash, * so we give up and unlink sk from bhash/bhash2 not * to leave inconsistency in bhash2. */ inet_put_port(sk); inet_reset_saddr(sk); } return -ENOMEM; } bhash = inet_bhashfn(net, port, hinfo->bhash_size); head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); /* If we change saddr locklessly, another thread * iterating over bhash might see corrupted address. */ spin_lock_bh(&head->lock); spin_lock(&head2->lock); __sk_del_bind_node(sk); inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); spin_unlock(&head2->lock); if (reset) inet_reset_saddr(sk); else inet_update_saddr(sk, saddr, family); head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; inet_bind2_bucket_init(tb2, net, head2, inet_csk(sk)->icsk_bind_hash, sk); } inet_csk(sk)->icsk_bind2_hash = tb2; sk_add_bind_node(sk, &tb2->owners); spin_unlock(&head2->lock); spin_unlock_bh(&head->lock); if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); return 0; } int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) { return __inet_bhash2_update_saddr(sk, saddr, family, false); } EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); void inet_bhash2_reset_saddr(struct sock *sk) { if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) __inet_bhash2_update_saddr(sk, NULL, 0, true); } EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. * * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though * attacks were since demonstrated, thus we use 65536 by default instead * to really give more isolation and privacy, at the expense of 256kB * of kernel memory. */ #define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_bind_hashbucket *head, *head2; struct inet_timewait_sock *tw = NULL; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; bool tb_created = false; u32 remaining, offset; int ret, i, low, high; bool local_ports; int step, l3mdev; u32 index; if (port) { local_bh_disable(); ret = check_established(death_row, sk, port, NULL); local_bh_enable(); return ret; } l3mdev = inet_sk_bound_l3mdev(sk); local_ports = inet_sk_get_local_port_range(sk, &low, &high); step = local_ports ? 1 : 2; high++; /* [32768, 60999] -> [32768, 61000[ */ remaining = high - low; if (!local_ports && remaining > 1) remaining &= ~1U; get_random_sleepable_once(table_perturb, INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); offset %= remaining; /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ if (!local_ports) offset &= ~1U; other_parity_scan: port = low + offset; for (i = 0; i < remaining; i += step, port += step) { if (unlikely(port >= high)) port -= remaining; if (inet_is_local_reserved_port(net, port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock_bh(&head->lock); /* Does not bother with rcv_saddr checks, because * the established check is already unique enough. */ inet_bind_bucket_for_each(tb, &head->chain) { if (inet_bind_bucket_match(tb, net, port, l3mdev)) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port; WARN_ON(hlist_empty(&tb->bhash2)); if (!check_established(death_row, sk, port, &tw)) goto ok; goto next_port; } } tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, head, port, l3mdev); if (!tb) { spin_unlock_bh(&head->lock); return -ENOMEM; } tb_created = true; tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; next_port: spin_unlock_bh(&head->lock); cond_resched(); } if (!local_ports) { offset++; if ((offset & 1) && remaining > 1) goto other_parity_scan; } return -EADDRNOTAVAIL; ok: /* Find the corresponding tb2 bucket since we need to * add the socket to the bhash2 table as well */ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, head2, tb, sk); if (!tb2) goto error; } /* Here we want to add a little bit of randomness to the next source * port that will be chosen. We use a max() with a random here so that * on low contention the randomness is maximal and on high contention * it may be inexistent. */ i = max_t(int, i, get_random_u32_below(8) * step); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head2->lock); spin_unlock(&head->lock); if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); return 0; error: if (sk_hashed(sk)) { spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(lock); __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); sk->sk_hash = 0; inet_sk(sk)->inet_sport = 0; inet_sk(sk)->inet_num = 0; if (tw) inet_twsk_bind_unhash(tw, hinfo); } spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); return -ENOMEM; } /* * Bind a port for a connect operation and hash it. */ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); return __inet_hash_connect(death_row, sk, port_offset, __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); static void init_hashinfo_lhash2(struct inet_hashinfo *h) { int i; for (i = 0; i <= h->lhash2_mask; i++) { spin_lock_init(&h->lhash2[i].lock); INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, i + LISTENING_NULLS_BASE); } } void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit) { h->lhash2 = alloc_large_system_hash(name, sizeof(*h->lhash2), numentries, scale, 0, NULL, &h->lhash2_mask, low_limit, high_limit); init_hashinfo_lhash2(h); /* this one is used for source ports of outgoing connections */ table_perturb = alloc_large_system_hash("Table-perturb", sizeof(*table_perturb), INET_TABLE_PERTURB_SIZE, 0, 0, NULL, NULL, INET_TABLE_PERTURB_SIZE, INET_TABLE_PERTURB_SIZE); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) { h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL); if (!h->lhash2) return -ENOMEM; h->lhash2_mask = INET_LHTABLE_SIZE - 1; /* INET_LHTABLE_SIZE must be a power of 2 */ BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask); init_hashinfo_lhash2(h); return 0; } EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod); int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); unsigned int i, nblocks = 1; if (locksz != 0) { /* allocate 2 cache lines or at least one spinlock per cpu */ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U); nblocks = roundup_pow_of_two(nblocks * num_possible_cpus()); /* no more locks than number of hash buckets */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL); if (!hashinfo->ehash_locks) return -ENOMEM; for (i = 0; i < nblocks; i++) spin_lock_init(&hashinfo->ehash_locks[i]); } hashinfo->ehash_locks_mask = nblocks - 1; return 0; } EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, unsigned int ehash_entries) { struct inet_hashinfo *new_hashinfo; int i; new_hashinfo = kmemdup(hashinfo, sizeof(*hashinfo), GFP_KERNEL); if (!new_hashinfo) goto err; new_hashinfo->ehash = vmalloc_huge(ehash_entries * sizeof(struct inet_ehash_bucket), GFP_KERNEL_ACCOUNT); if (!new_hashinfo->ehash) goto free_hashinfo; new_hashinfo->ehash_mask = ehash_entries - 1; if (inet_ehash_locks_alloc(new_hashinfo)) goto free_ehash; for (i = 0; i < ehash_entries; i++) INIT_HLIST_NULLS_HEAD(&new_hashinfo->ehash[i].chain, i); new_hashinfo->pernet = true; return new_hashinfo; free_ehash: vfree(new_hashinfo->ehash); free_hashinfo: kfree(new_hashinfo); err: return NULL; } EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc); void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo) { if (!hashinfo->pernet) return; inet_ehash_locks_free(hashinfo); vfree(hashinfo->ehash); kfree(hashinfo); } EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free);
11 11 10 10 7 7 5 2 7 14 5 1 4 2 2 2 2 13 9 19 8 8 3 2 2 4 2 6 8 8 1 7 2 5 6 1 1 2 1 1 1 1 1 24 24 1 2 21 2 2 14 15 7 20 25 1 24 8 1 1 2 2 4 4 3 1 1 7 1 2 2 4 1 3 4 1 4 3 3 3 3 1 1 1 9 1 2 1 6 1 5 5 4 1 1 1 6 1 2 1 3 3 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 // SPDX-License-Identifier: GPL-2.0-only /* * VMware VMCI Driver * * Copyright (C) 2012 VMware, Inc. All rights reserved. */ #include <linux/vmw_vmci_defs.h> #include <linux/vmw_vmci_api.h> #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/pagemap.h> #include <linux/pci.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uio.h> #include <linux/wait.h> #include <linux/vmalloc.h> #include <linux/skbuff.h> #include "vmci_handle_array.h" #include "vmci_queue_pair.h" #include "vmci_datagram.h" #include "vmci_resource.h" #include "vmci_context.h" #include "vmci_driver.h" #include "vmci_event.h" #include "vmci_route.h" /* * In the following, we will distinguish between two kinds of VMX processes - * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized * VMCI page files in the VMX and supporting VM to VM communication and the * newer ones that use the guest memory directly. We will in the following * refer to the older VMX versions as old-style VMX'en, and the newer ones as * new-style VMX'en. * * The state transition datagram is as follows (the VMCIQPB_ prefix has been * removed for readability) - see below for more details on the transtions: * * -------------- NEW ------------- * | | * \_/ \_/ * CREATED_NO_MEM <-----------------> CREATED_MEM * | | | * | o-----------------------o | * | | | * \_/ \_/ \_/ * ATTACHED_NO_MEM <----------------> ATTACHED_MEM * | | | * | o----------------------o | * | | | * \_/ \_/ \_/ * SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM * | | * | | * -------------> gone <------------- * * In more detail. When a VMCI queue pair is first created, it will be in the * VMCIQPB_NEW state. It will then move into one of the following states: * * - VMCIQPB_CREATED_NO_MEM: this state indicates that either: * * - the created was performed by a host endpoint, in which case there is * no backing memory yet. * * - the create was initiated by an old-style VMX, that uses * vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at * a later point in time. This state can be distinguished from the one * above by the context ID of the creator. A host side is not allowed to * attach until the page store has been set. * * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair * is created by a VMX using the queue pair device backend that * sets the UVAs of the queue pair immediately and stores the * information for later attachers. At this point, it is ready for * the host side to attach to it. * * Once the queue pair is in one of the created states (with the exception of * the case mentioned for older VMX'en above), it is possible to attach to the * queue pair. Again we have two new states possible: * * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following * paths: * * - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue * pair, and attaches to a queue pair previously created by the host side. * * - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair * already created by a guest. * * - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls * vmci_qp_broker_set_page_store (see below). * * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the * VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will * bring the queue pair into this state. Once vmci_qp_broker_set_page_store * is called to register the user memory, the VMCIQPB_ATTACH_MEM state * will be entered. * * From the attached queue pair, the queue pair can enter the shutdown states * when either side of the queue pair detaches. If the guest side detaches * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where * the content of the queue pair will no longer be available. If the host * side detaches first, the queue pair will either enter the * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped * (e.g., the host detaches while a guest is stunned). * * New-style VMX'en will also unmap guest memory, if the guest is * quiesced, e.g., during a snapshot operation. In that case, the guest * memory will no longer be available, and the queue pair will transition from * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more, * in which case the queue pair will transition from the *_NO_MEM state at that * point back to the *_MEM state. Note that the *_NO_MEM state may have changed, * since the peer may have either attached or detached in the meantime. The * values are laid out such that ++ on a state will move from a *_NO_MEM to a * *_MEM state, and vice versa. */ /* The Kernel specific component of the struct vmci_queue structure. */ struct vmci_queue_kern_if { struct mutex __mutex; /* Protects the queue. */ struct mutex *mutex; /* Shared by producer and consumer queues. */ size_t num_pages; /* Number of pages incl. header. */ bool host; /* Host or guest? */ union { struct { dma_addr_t *pas; void **vas; } g; /* Used by the guest. */ struct { struct page **page; struct page **header_page; } h; /* Used by the host. */ } u; }; /* * This structure is opaque to the clients. */ struct vmci_qp { struct vmci_handle handle; struct vmci_queue *produce_q; struct vmci_queue *consume_q; u64 produce_q_size; u64 consume_q_size; u32 peer; u32 flags; u32 priv_flags; bool guest_endpoint; unsigned int blocked; unsigned int generation; wait_queue_head_t event; }; enum qp_broker_state { VMCIQPB_NEW, VMCIQPB_CREATED_NO_MEM, VMCIQPB_CREATED_MEM, VMCIQPB_ATTACHED_NO_MEM, VMCIQPB_ATTACHED_MEM, VMCIQPB_SHUTDOWN_NO_MEM, VMCIQPB_SHUTDOWN_MEM, VMCIQPB_GONE }; #define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \ _qpb->state == VMCIQPB_ATTACHED_MEM || \ _qpb->state == VMCIQPB_SHUTDOWN_MEM) /* * In the queue pair broker, we always use the guest point of view for * the produce and consume queue values and references, e.g., the * produce queue size stored is the guests produce queue size. The * host endpoint will need to swap these around. The only exception is * the local queue pairs on the host, in which case the host endpoint * that creates the queue pair will have the right orientation, and * the attaching host endpoint will need to swap. */ struct qp_entry { struct list_head list_item; struct vmci_handle handle; u32 peer; u32 flags; u64 produce_size; u64 consume_size; u32 ref_count; }; struct qp_broker_entry { struct vmci_resource resource; struct qp_entry qp; u32 create_id; u32 attach_id; enum qp_broker_state state; bool require_trusted_attach; bool created_by_trusted; bool vmci_page_files; /* Created by VMX using VMCI page files */ struct vmci_queue *produce_q; struct vmci_queue *consume_q; struct vmci_queue_header saved_produce_q; struct vmci_queue_header saved_consume_q; vmci_event_release_cb wakeup_cb; void *client_data; void *local_mem; /* Kernel memory for local queue pair */ }; struct qp_guest_endpoint { struct vmci_resource resource; struct qp_entry qp; u64 num_ppns; void *produce_q; void *consume_q; struct ppn_set ppn_set; }; struct qp_list { struct list_head head; struct mutex mutex; /* Protect queue list. */ }; static struct qp_list qp_broker_list = { .head = LIST_HEAD_INIT(qp_broker_list.head), .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex), }; static struct qp_list qp_guest_endpoints = { .head = LIST_HEAD_INIT(qp_guest_endpoints.head), .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex), }; #define INVALID_VMCI_GUEST_MEM_ID 0 #define QPE_NUM_PAGES(_QPE) ((u32) \ (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \ DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2)) #define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \ ((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \ (_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY) /* * Frees kernel VA space for a given queue and its queue header, and * frees physical data pages. */ static void qp_free_queue(void *q, u64 size) { struct vmci_queue *queue = q; if (queue) { u64 i; /* Given size does not include header, so add in a page here. */ for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) { dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE, queue->kernel_if->u.g.vas[i], queue->kernel_if->u.g.pas[i]); } vfree(queue); } } /* * Allocates kernel queue pages of specified size with IOMMU mappings, * plus space for the queue structure/kernel interface and the queue * header. */ static void *qp_alloc_queue(u64 size, u32 flags) { u64 i; struct vmci_queue *queue; size_t pas_size; size_t vas_size; size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if); u64 num_pages; if (size > SIZE_MAX - PAGE_SIZE) return NULL; num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; if (num_pages > (SIZE_MAX - queue_size) / (sizeof(*queue->kernel_if->u.g.pas) + sizeof(*queue->kernel_if->u.g.vas))) return NULL; pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas); vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas); queue_size += pas_size + vas_size; queue = vmalloc(queue_size); if (!queue) return NULL; queue->q_header = NULL; queue->saved_header = NULL; queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); queue->kernel_if->mutex = NULL; queue->kernel_if->num_pages = num_pages; queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1); queue->kernel_if->u.g.vas = (void **)((u8 *)queue->kernel_if->u.g.pas + pas_size); queue->kernel_if->host = false; for (i = 0; i < num_pages; i++) { queue->kernel_if->u.g.vas[i] = dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE, &queue->kernel_if->u.g.pas[i], GFP_KERNEL); if (!queue->kernel_if->u.g.vas[i]) { /* Size excl. the header. */ qp_free_queue(queue, i * PAGE_SIZE); return NULL; } } /* Queue header is the first page. */ queue->q_header = queue->kernel_if->u.g.vas[0]; return queue; } /* * Copies from a given buffer or iovector to a VMCI Queue. Uses * kmap_local_page() to dynamically map required portions of the queue * by traversing the offset -> page translation structure for the queue. * Assumes that offset + size does not wrap around in the queue. */ static int qp_memcpy_to_queue_iter(struct vmci_queue *queue, u64 queue_offset, struct iov_iter *from, size_t size) { struct vmci_queue_kern_if *kernel_if = queue->kernel_if; size_t bytes_copied = 0; while (bytes_copied < size) { const u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE; const size_t page_offset = (queue_offset + bytes_copied) & (PAGE_SIZE - 1); void *va; size_t to_copy; if (kernel_if->host) va = kmap_local_page(kernel_if->u.h.page[page_index]); else va = kernel_if->u.g.vas[page_index + 1]; /* Skip header. */ if (size - bytes_copied > PAGE_SIZE - page_offset) /* Enough payload to fill up from this page. */ to_copy = PAGE_SIZE - page_offset; else to_copy = size - bytes_copied; if (!copy_from_iter_full((u8 *)va + page_offset, to_copy, from)) { if (kernel_if->host) kunmap_local(va); return VMCI_ERROR_INVALID_ARGS; } bytes_copied += to_copy; if (kernel_if->host) kunmap_local(va); } return VMCI_SUCCESS; } /* * Copies to a given buffer or iovector from a VMCI Queue. Uses * kmap_local_page() to dynamically map required portions of the queue * by traversing the offset -> page translation structure for the queue. * Assumes that offset + size does not wrap around in the queue. */ static int qp_memcpy_from_queue_iter(struct iov_iter *to, const struct vmci_queue *queue, u64 queue_offset, size_t size) { struct vmci_queue_kern_if *kernel_if = queue->kernel_if; size_t bytes_copied = 0; while (bytes_copied < size) { const u64 page_index = (queue_offset + bytes_copied) / PAGE_SIZE; const size_t page_offset = (queue_offset + bytes_copied) & (PAGE_SIZE - 1); void *va; size_t to_copy; int err; if (kernel_if->host) va = kmap_local_page(kernel_if->u.h.page[page_index]); else va = kernel_if->u.g.vas[page_index + 1]; /* Skip header. */ if (size - bytes_copied > PAGE_SIZE - page_offset) /* Enough payload to fill up this page. */ to_copy = PAGE_SIZE - page_offset; else to_copy = size - bytes_copied; err = copy_to_iter((u8 *)va + page_offset, to_copy, to); if (err != to_copy) { if (kernel_if->host) kunmap_local(va); return VMCI_ERROR_INVALID_ARGS; } bytes_copied += to_copy; if (kernel_if->host) kunmap_local(va); } return VMCI_SUCCESS; } /* * Allocates two list of PPNs --- one for the pages in the produce queue, * and the other for the pages in the consume queue. Intializes the list * of PPNs with the page frame numbers of the KVA for the two queues (and * the queue headers). */ static int qp_alloc_ppn_set(void *prod_q, u64 num_produce_pages, void *cons_q, u64 num_consume_pages, struct ppn_set *ppn_set) { u64 *produce_ppns; u64 *consume_ppns; struct vmci_queue *produce_q = prod_q; struct vmci_queue *consume_q = cons_q; u64 i; if (!produce_q || !num_produce_pages || !consume_q || !num_consume_pages || !ppn_set) return VMCI_ERROR_INVALID_ARGS; if (ppn_set->initialized) return VMCI_ERROR_ALREADY_EXISTS; produce_ppns = kmalloc_array(num_produce_pages, sizeof(*produce_ppns), GFP_KERNEL); if (!produce_ppns) return VMCI_ERROR_NO_MEM; consume_ppns = kmalloc_array(num_consume_pages, sizeof(*consume_ppns), GFP_KERNEL); if (!consume_ppns) { kfree(produce_ppns); return VMCI_ERROR_NO_MEM; } for (i = 0; i < num_produce_pages; i++) produce_ppns[i] = produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; for (i = 0; i < num_consume_pages; i++) consume_ppns[i] = consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; ppn_set->num_produce_pages = num_produce_pages; ppn_set->num_consume_pages = num_consume_pages; ppn_set->produce_ppns = produce_ppns; ppn_set->consume_ppns = consume_ppns; ppn_set->initialized = true; return VMCI_SUCCESS; } /* * Frees the two list of PPNs for a queue pair. */ static void qp_free_ppn_set(struct ppn_set *ppn_set) { if (ppn_set->initialized) { /* Do not call these functions on NULL inputs. */ kfree(ppn_set->produce_ppns); kfree(ppn_set->consume_ppns); } memset(ppn_set, 0, sizeof(*ppn_set)); } /* * Populates the list of PPNs in the hypercall structure with the PPNS * of the produce queue and the consume queue. */ static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set) { if (vmci_use_ppn64()) { memcpy(call_buf, ppn_set->produce_ppns, ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns)); memcpy(call_buf + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns), ppn_set->consume_ppns, ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns)); } else { int i; u32 *ppns = (u32 *) call_buf; for (i = 0; i < ppn_set->num_produce_pages; i++) ppns[i] = (u32) ppn_set->produce_ppns[i]; ppns = &ppns[ppn_set->num_produce_pages]; for (i = 0; i < ppn_set->num_consume_pages; i++) ppns[i] = (u32) ppn_set->consume_ppns[i]; } return VMCI_SUCCESS; } /* * Allocates kernel VA space of specified size plus space for the queue * and kernel interface. This is different from the guest queue allocator, * because we do not allocate our own queue header/data pages here but * share those of the guest. */ static struct vmci_queue *qp_host_alloc_queue(u64 size) { struct vmci_queue *queue; size_t queue_page_size; u64 num_pages; const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)); if (size > min_t(size_t, VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - PAGE_SIZE)) return NULL; num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; if (num_pages > (SIZE_MAX - queue_size) / sizeof(*queue->kernel_if->u.h.page)) return NULL; queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page); if (queue_size + queue_page_size > KMALLOC_MAX_SIZE) return NULL; queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL); if (queue) { queue->q_header = NULL; queue->saved_header = NULL; queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); queue->kernel_if->host = true; queue->kernel_if->mutex = NULL; queue->kernel_if->num_pages = num_pages; queue->kernel_if->u.h.header_page = (struct page **)((u8 *)queue + queue_size); queue->kernel_if->u.h.page = &queue->kernel_if->u.h.header_page[1]; } return queue; } /* * Frees kernel memory for a given queue (header plus translation * structure). */ static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size) { kfree(queue); } /* * Initialize the mutex for the pair of queues. This mutex is used to * protect the q_header and the buffer from changing out from under any * users of either queue. Of course, it's only any good if the mutexes * are actually acquired. Queue structure must lie on non-paged memory * or we cannot guarantee access to the mutex. */ static void qp_init_queue_mutex(struct vmci_queue *produce_q, struct vmci_queue *consume_q) { /* * Only the host queue has shared state - the guest queues do not * need to synchronize access using a queue mutex. */ if (produce_q->kernel_if->host) { produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; mutex_init(produce_q->kernel_if->mutex); } } /* * Cleans up the mutex for the pair of queues. */ static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q, struct vmci_queue *consume_q) { if (produce_q->kernel_if->host) { produce_q->kernel_if->mutex = NULL; consume_q->kernel_if->mutex = NULL; } } /* * Acquire the mutex for the queue. Note that the produce_q and * the consume_q share a mutex. So, only one of the two need to * be passed in to this routine. Either will work just fine. */ static void qp_acquire_queue_mutex(struct vmci_queue *queue) { if (queue->kernel_if->host) mutex_lock(queue->kernel_if->mutex); } /* * Release the mutex for the queue. Note that the produce_q and * the consume_q share a mutex. So, only one of the two need to * be passed in to this routine. Either will work just fine. */ static void qp_release_queue_mutex(struct vmci_queue *queue) { if (queue->kernel_if->host) mutex_unlock(queue->kernel_if->mutex); } /* * Helper function to release pages in the PageStoreAttachInfo * previously obtained using get_user_pages. */ static void qp_release_pages(struct page **pages, u64 num_pages, bool dirty) { int i; for (i = 0; i < num_pages; i++) { if (dirty) set_page_dirty_lock(pages[i]); put_page(pages[i]); pages[i] = NULL; } } /* * Lock the user pages referenced by the {produce,consume}Buffer * struct into memory and populate the {produce,consume}Pages * arrays in the attach structure with them. */ static int qp_host_get_user_memory(u64 produce_uva, u64 consume_uva, struct vmci_queue *produce_q, struct vmci_queue *consume_q) { int retval; int err = VMCI_SUCCESS; retval = get_user_pages_fast((uintptr_t) produce_uva, produce_q->kernel_if->num_pages, FOLL_WRITE, produce_q->kernel_if->u.h.header_page); if (retval < (int)produce_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(produce) failed (retval=%d)", retval); if (retval > 0) qp_release_pages(produce_q->kernel_if->u.h.header_page, retval, false); err = VMCI_ERROR_NO_MEM; goto out; } retval = get_user_pages_fast((uintptr_t) consume_uva, consume_q->kernel_if->num_pages, FOLL_WRITE, consume_q->kernel_if->u.h.header_page); if (retval < (int)consume_q->kernel_if->num_pages) { pr_debug("get_user_pages_fast(consume) failed (retval=%d)", retval); if (retval > 0) qp_release_pages(consume_q->kernel_if->u.h.header_page, retval, false); qp_release_pages(produce_q->kernel_if->u.h.header_page, produce_q->kernel_if->num_pages, false); err = VMCI_ERROR_NO_MEM; } out: return err; } /* * Registers the specification of the user pages used for backing a queue * pair. Enough information to map in pages is stored in the OS specific * part of the struct vmci_queue structure. */ static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store, struct vmci_queue *produce_q, struct vmci_queue *consume_q) { u64 produce_uva; u64 consume_uva; /* * The new style and the old style mapping only differs in * that we either get a single or two UVAs, so we split the * single UVA range at the appropriate spot. */ produce_uva = page_store->pages; consume_uva = page_store->pages + produce_q->kernel_if->num_pages * PAGE_SIZE; return qp_host_get_user_memory(produce_uva, consume_uva, produce_q, consume_q); } /* * Releases and removes the references to user pages stored in the attach * struct. Pages are released from the page cache and may become * swappable again. */ static void qp_host_unregister_user_memory(struct vmci_queue *produce_q, struct vmci_queue *consume_q) { qp_release_pages(produce_q->kernel_if->u.h.header_page, produce_q->kernel_if->num_pages, true); memset(produce_q->kernel_if->u.h.header_page, 0, sizeof(*produce_q->kernel_if->u.h.header_page) * produce_q->kernel_if->num_pages); qp_release_pages(consume_q->kernel_if->u.h.header_page, consume_q->kernel_if->num_pages, true); memset(consume_q->kernel_if->u.h.header_page, 0, sizeof(*consume_q->kernel_if->u.h.header_page) * consume_q->kernel_if->num_pages); } /* * Once qp_host_register_user_memory has been performed on a * queue, the queue pair headers can be mapped into the * kernel. Once mapped, they must be unmapped with * qp_host_unmap_queues prior to calling * qp_host_unregister_user_memory. * Pages are pinned. */ static int qp_host_map_queues(struct vmci_queue *produce_q, struct vmci_queue *consume_q) { int result; if (!produce_q->q_header || !consume_q->q_header) { struct page *headers[2]; if (produce_q->q_header != consume_q->q_header) return VMCI_ERROR_QUEUEPAIR_MISMATCH; if (produce_q->kernel_if->u.h.header_page == NULL || *produce_q->kernel_if->u.h.header_page == NULL) return VMCI_ERROR_UNAVAILABLE; headers[0] = *produce_q->kernel_if->u.h.header_page; headers[1] = *consume_q->kernel_if->u.h.header_page; produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL); if (produce_q->q_header != NULL) { consume_q->q_header = (struct vmci_queue_header *)((u8 *) produce_q->q_header + PAGE_SIZE); result = VMCI_SUCCESS; } else { pr_warn("vmap failed\n"); result = VMCI_ERROR_NO_MEM; } } else { result = VMCI_SUCCESS; } return result; } /* * Unmaps previously mapped queue pair headers from the kernel. * Pages are unpinned. */ static int qp_host_unmap_queues(u32 gid, struct vmci_queue *produce_q, struct vmci_queue *consume_q) { if (produce_q->q_header) { if (produce_q->q_header < consume_q->q_header) vunmap(produce_q->q_header); else vunmap(consume_q->q_header); produce_q->q_header = NULL; consume_q->q_header = NULL; } return VMCI_SUCCESS; } /* * Finds the entry in the list corresponding to a given handle. Assumes * that the list is locked. */ static struct qp_entry *qp_list_find(struct qp_list *qp_list, struct vmci_handle handle) { struct qp_entry *entry; if (vmci_handle_is_invalid(handle)) return NULL; list_for_each_entry(entry, &qp_list->head, list_item) { if (vmci_handle_is_equal(entry->handle, handle)) return entry; } return NULL; } /* * Finds the entry in the list corresponding to a given handle. */ static struct qp_guest_endpoint * qp_guest_handle_to_entry(struct vmci_handle handle) { struct qp_guest_endpoint *entry; struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle); entry = qp ? container_of( qp, struct qp_guest_endpoint, qp) : NULL; return entry; } /* * Finds the entry in the list corresponding to a given handle. */ static struct qp_broker_entry * qp_broker_handle_to_entry(struct vmci_handle handle) { struct qp_broker_entry *entry; struct qp_entry *qp = qp_list_find(&qp_broker_list, handle); entry = qp ? container_of( qp, struct qp_broker_entry, qp) : NULL; return entry; } /* * Dispatches a queue pair event message directly into the local event * queue. */ static int qp_notify_peer_local(bool attach, struct vmci_handle handle) { u32 context_id = vmci_get_context_id(); struct vmci_event_qp ev; memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); ev.msg.event_data.event = attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; ev.payload.peer_id = context_id; ev.payload.handle = handle; return vmci_event_dispatch(&ev.msg.hdr); } /* * Allocates and initializes a qp_guest_endpoint structure. * Allocates a queue_pair rid (and handle) iff the given entry has * an invalid handle. 0 through VMCI_RESERVED_RESOURCE_ID_MAX * are reserved handles. Assumes that the QP list mutex is held * by the caller. */ static struct qp_guest_endpoint * qp_guest_endpoint_create(struct vmci_handle handle, u32 peer, u32 flags, u64 produce_size, u64 consume_size, void *produce_q, void *consume_q) { int result; struct qp_guest_endpoint *entry; /* One page each for the queue headers. */ const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) + DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2; if (vmci_handle_is_invalid(handle)) { u32 context_id = vmci_get_context_id(); handle = vmci_make_handle(context_id, VMCI_INVALID_ID); } entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry) { entry->qp.peer = peer; entry->qp.flags = flags; entry->qp.produce_size = produce_size; entry->qp.consume_size = consume_size; entry->qp.ref_count = 0; entry->num_ppns = num_ppns; entry->produce_q = produce_q; entry->consume_q = consume_q; INIT_LIST_HEAD(&entry->qp.list_item); /* Add resource obj */ result = vmci_resource_add(&entry->resource, VMCI_RESOURCE_TYPE_QPAIR_GUEST, handle); entry->qp.handle = vmci_resource_handle(&entry->resource); if ((result != VMCI_SUCCESS) || qp_list_find(&qp_guest_endpoints, entry->qp.handle)) { pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d", handle.context, handle.resource, result); kfree(entry); entry = NULL; } } return entry; } /* * Frees a qp_guest_endpoint structure. */ static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry) { qp_free_ppn_set(&entry->ppn_set); qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q); qp_free_queue(entry->produce_q, entry->qp.produce_size); qp_free_queue(entry->consume_q, entry->qp.consume_size); /* Unlink from resource hash table and free callback */ vmci_resource_remove(&entry->resource); kfree(entry); } /* * Helper to make a queue_pairAlloc hypercall when the driver is * supporting a guest device. */ static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry) { struct vmci_qp_alloc_msg *alloc_msg; size_t msg_size; size_t ppn_size; int result; if (!entry || entry->num_ppns <= 2) return VMCI_ERROR_INVALID_ARGS; ppn_size = vmci_use_ppn64() ? sizeof(u64) : sizeof(u32); msg_size = sizeof(*alloc_msg) + (size_t) entry->num_ppns * ppn_size; alloc_msg = kmalloc(msg_size, GFP_KERNEL); if (!alloc_msg) return VMCI_ERROR_NO_MEM; alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_QUEUEPAIR_ALLOC); alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE; alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE; alloc_msg->handle = entry->qp.handle; alloc_msg->peer = entry->qp.peer; alloc_msg->flags = entry->qp.flags; alloc_msg->produce_size = entry->qp.produce_size; alloc_msg->consume_size = entry->qp.consume_size; alloc_msg->num_ppns = entry->num_ppns; result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg), &entry->ppn_set); if (result == VMCI_SUCCESS) result = vmci_send_datagram(&alloc_msg->hdr); kfree(alloc_msg); return result; } /* * Helper to make a queue_pairDetach hypercall when the driver is * supporting a guest device. */ static int qp_detatch_hypercall(struct vmci_handle handle) { struct vmci_qp_detach_msg detach_msg; detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_QUEUEPAIR_DETACH); detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE; detach_msg.hdr.payload_size = sizeof(handle); detach_msg.handle = handle; return vmci_send_datagram(&detach_msg.hdr); } /* * Adds the given entry to the list. Assumes that the list is locked. */ static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry) { if (entry) list_add(&entry->list_item, &qp_list->head); } /* * Removes the given entry from the list. Assumes that the list is locked. */ static void qp_list_remove_entry(struct qp_list *qp_list, struct qp_entry *entry) { if (entry) list_del(&entry->list_item); } /* * Helper for VMCI queue_pair detach interface. Frees the physical * pages for the queue pair. */ static int qp_detatch_guest_work(struct vmci_handle handle) { int result; struct qp_guest_endpoint *entry; u32 ref_count = ~0; /* To avoid compiler warning below */ mutex_lock(&qp_guest_endpoints.mutex); entry = qp_guest_handle_to_entry(handle); if (!entry) { mutex_unlock(&qp_guest_endpoints.mutex); return VMCI_ERROR_NOT_FOUND; } if (entry->qp.flags & VMCI_QPFLAG_LOCAL) { result = VMCI_SUCCESS; if (entry->qp.ref_count > 1) { result = qp_notify_peer_local(false, handle); /* * We can fail to notify a local queuepair * because we can't allocate. We still want * to release the entry if that happens, so * don't bail out yet. */ } } else { result = qp_detatch_hypercall(handle); if (result < VMCI_SUCCESS) { /* * We failed to notify a non-local queuepair. * That other queuepair might still be * accessing the shared memory, so don't * release the entry yet. It will get cleaned * up by VMCIqueue_pair_Exit() if necessary * (assuming we are going away, otherwise why * did this fail?). */ mutex_unlock(&qp_guest_endpoints.mutex); return result; } } /* * If we get here then we either failed to notify a local queuepair, or * we succeeded in all cases. Release the entry if required. */ entry->qp.ref_count--; if (entry->qp.ref_count == 0) qp_list_remove_entry(&qp_guest_endpoints, &entry->qp); /* If we didn't remove the entry, this could change once we unlock. */ if (entry) ref_count = entry->qp.ref_count; mutex_unlock(&qp_guest_endpoints.mutex); if (ref_count == 0) qp_guest_endpoint_destroy(entry); return result; } /* * This functions handles the actual allocation of a VMCI queue * pair guest endpoint. Allocates physical pages for the queue * pair. It makes OS dependent calls through generic wrappers. */ static int qp_alloc_guest_work(struct vmci_handle *handle, struct vmci_queue **produce_q, u64 produce_size, struct vmci_queue **consume_q, u64 consume_size, u32 peer, u32 flags, u32 priv_flags) { const u64 num_produce_pages = DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1; const u64 num_consume_pages = DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1; void *my_produce_q = NULL; void *my_consume_q = NULL; int result; struct qp_guest_endpoint *queue_pair_entry = NULL; if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS) return VMCI_ERROR_NO_ACCESS; mutex_lock(&qp_guest_endpoints.mutex); queue_pair_entry = qp_guest_handle_to_entry(*handle); if (queue_pair_entry) { if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { /* Local attach case. */ if (queue_pair_entry->qp.ref_count > 1) { pr_devel("Error attempting to attach more than once\n"); result = VMCI_ERROR_UNAVAILABLE; goto error_keep_entry; } if (queue_pair_entry->qp.produce_size != consume_size || queue_pair_entry->qp.consume_size != produce_size || queue_pair_entry->qp.flags != (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) { pr_devel("Error mismatched queue pair in local attach\n"); result = VMCI_ERROR_QUEUEPAIR_MISMATCH; goto error_keep_entry; } /* * Do a local attach. We swap the consume and * produce queues for the attacher and deliver * an attach event. */ result = qp_notify_peer_local(true, *handle); if (result < VMCI_SUCCESS) goto error_keep_entry; my_produce_q = queue_pair_entry->consume_q; my_consume_q = queue_pair_entry->produce_q; goto out; } result = VMCI_ERROR_ALREADY_EXISTS; goto error_keep_entry; } my_produce_q = qp_alloc_queue(produce_size, flags); if (!my_produce_q) { pr_warn("Error allocating pages for produce queue\n"); result = VMCI_ERROR_NO_MEM; goto error; } my_consume_q = qp_alloc_queue(consume_size, flags); if (!my_consume_q) { pr_warn("Error allocating pages for consume queue\n"); result = VMCI_ERROR_NO_MEM; goto error; } queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags, produce_size, consume_size, my_produce_q, my_consume_q); if (!queue_pair_entry) { pr_warn("Error allocating memory in %s\n", __func__); result = VMCI_ERROR_NO_MEM; goto error; } result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q, num_consume_pages, &queue_pair_entry->ppn_set); if (result < VMCI_SUCCESS) { pr_warn("qp_alloc_ppn_set failed\n"); goto error; } /* * It's only necessary to notify the host if this queue pair will be * attached to from another context. */ if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { /* Local create case. */ u32 context_id = vmci_get_context_id(); /* * Enforce similar checks on local queue pairs as we * do for regular ones. The handle's context must * match the creator or attacher context id (here they * are both the current context id) and the * attach-only flag cannot exist during create. We * also ensure specified peer is this context or an * invalid one. */ if (queue_pair_entry->qp.handle.context != context_id || (queue_pair_entry->qp.peer != VMCI_INVALID_ID && queue_pair_entry->qp.peer != context_id)) { result = VMCI_ERROR_NO_ACCESS; goto error; } if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) { result = VMCI_ERROR_NOT_FOUND; goto error; } } else { result = qp_alloc_hypercall(queue_pair_entry); if (result < VMCI_SUCCESS) { pr_devel("qp_alloc_hypercall result = %d\n", result); goto error; } } qp_init_queue_mutex((struct vmci_queue *)my_produce_q, (struct vmci_queue *)my_consume_q); qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp); out: queue_pair_entry->qp.ref_count++; *handle = queue_pair_entry->qp.handle; *produce_q = (struct vmci_queue *)my_produce_q; *consume_q = (struct vmci_queue *)my_consume_q; /* * We should initialize the queue pair header pages on a local * queue pair create. For non-local queue pairs, the * hypervisor initializes the header pages in the create step. */ if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) && queue_pair_entry->qp.ref_count == 1) { vmci_q_header_init((*produce_q)->q_header, *handle); vmci_q_header_init((*consume_q)->q_header, *handle); } mutex_unlock(&qp_guest_endpoints.mutex); return VMCI_SUCCESS; error: mutex_unlock(&qp_guest_endpoints.mutex); if (queue_pair_entry) { /* The queues will be freed inside the destroy routine. */ qp_guest_endpoint_destroy(queue_pair_entry); } else { qp_free_queue(my_produce_q, produce_size); qp_free_queue(my_consume_q, consume_size); } return result; error_keep_entry: /* This path should only be used when an existing entry was found. */ mutex_unlock(&qp_guest_endpoints.mutex); return result; } /* * The first endpoint issuing a queue pair allocation will create the state * of the queue pair in the queue pair broker. * * If the creator is a guest, it will associate a VMX virtual address range * with the queue pair as specified by the page_store. For compatibility with * older VMX'en, that would use a separate step to set the VMX virtual * address range, the virtual address range can be registered later using * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be * used. * * If the creator is the host, a page_store of NULL should be used as well, * since the host is not able to supply a page store for the queue pair. * * For older VMX and host callers, the queue pair will be created in the * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be * created in VMCOQPB_CREATED_MEM state. */ static int qp_broker_create(struct vmci_handle handle, u32 peer, u32 flags, u32 priv_flags, u64 produce_size, u64 consume_size, struct vmci_qp_page_store *page_store, struct vmci_ctx *context, vmci_event_release_cb wakeup_cb, void *client_data, struct qp_broker_entry **ent) { struct qp_broker_entry *entry = NULL; const u32 context_id = vmci_ctx_get_id(context); bool is_local = flags & VMCI_QPFLAG_LOCAL; int result; u64 guest_produce_size; u64 guest_consume_size; /* Do not create if the caller asked not to. */ if (flags & VMCI_QPFLAG_ATTACH_ONLY) return VMCI_ERROR_NOT_FOUND; /* * Creator's context ID should match handle's context ID or the creator * must allow the context in handle's context ID as the "peer". */ if (handle.context != context_id && handle.context != peer) return VMCI_ERROR_NO_ACCESS; if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer)) return VMCI_ERROR_DST_UNREACHABLE; /* * Creator's context ID for local queue pairs should match the * peer, if a peer is specified. */ if (is_local && peer != VMCI_INVALID_ID && context_id != peer) return VMCI_ERROR_NO_ACCESS; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return VMCI_ERROR_NO_MEM; if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) { /* * The queue pair broker entry stores values from the guest * point of view, so a creating host side endpoint should swap * produce and consume values -- unless it is a local queue * pair, in which case no swapping is necessary, since the local * attacher will swap queues. */ guest_produce_size = consume_size; guest_consume_size = produce_size; } else { guest_produce_size = produce_size; guest_consume_size = consume_size; } entry->qp.handle = handle; entry->qp.peer = peer; entry->qp.flags = flags; entry->qp.produce_size = guest_produce_size; entry->qp.consume_size = guest_consume_size; entry->qp.ref_count = 1; entry->create_id = context_id; entry->attach_id = VMCI_INVALID_ID; entry->state = VMCIQPB_NEW; entry->require_trusted_attach = !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED); entry->created_by_trusted = !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED); entry->vmci_page_files = false; entry->wakeup_cb = wakeup_cb; entry->client_data = client_data; entry->produce_q = qp_host_alloc_queue(guest_produce_size); if (entry->produce_q == NULL) { result = VMCI_ERROR_NO_MEM; goto error; } entry->consume_q = qp_host_alloc_queue(guest_consume_size); if (entry->consume_q == NULL) { result = VMCI_ERROR_NO_MEM; goto error; } qp_init_queue_mutex(entry->produce_q, entry->consume_q); INIT_LIST_HEAD(&entry->qp.list_item); if (is_local) { u8 *tmp; entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp), PAGE_SIZE, GFP_KERNEL); if (entry->local_mem == NULL) { result = VMCI_ERROR_NO_MEM; goto error; } entry->state = VMCIQPB_CREATED_MEM; entry->produce_q->q_header = entry->local_mem; tmp = (u8 *)entry->local_mem + PAGE_SIZE * (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1); entry->consume_q->q_header = (struct vmci_queue_header *)tmp; } else if (page_store) { /* * The VMX already initialized the queue pair headers, so no * need for the kernel side to do that. */ result = qp_host_register_user_memory(page_store, entry->produce_q, entry->consume_q); if (result < VMCI_SUCCESS) goto error; entry->state = VMCIQPB_CREATED_MEM; } else { /* * A create without a page_store may be either a host * side create (in which case we are waiting for the * guest side to supply the memory) or an old style * queue pair create (in which case we will expect a * set page store call as the next step). */ entry->state = VMCIQPB_CREATED_NO_MEM; } qp_list_add_entry(&qp_broker_list, &entry->qp); if (ent != NULL) *ent = entry; /* Add to resource obj */ result = vmci_resource_add(&entry->resource, VMCI_RESOURCE_TYPE_QPAIR_HOST, handle); if (result != VMCI_SUCCESS) { pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d", handle.context, handle.resource, result); goto error; } entry->qp.handle = vmci_resource_handle(&entry->resource); if (is_local) { vmci_q_header_init(entry->produce_q->q_header, entry->qp.handle); vmci_q_header_init(entry->consume_q->q_header, entry->qp.handle); } vmci_ctx_qp_create(context, entry->qp.handle); return VMCI_SUCCESS; error: if (entry != NULL) { qp_host_free_queue(entry->produce_q, guest_produce_size); qp_host_free_queue(entry->consume_q, guest_consume_size); kfree(entry); } return result; } /* * Enqueues an event datagram to notify the peer VM attached to * the given queue pair handle about attach/detach event by the * given VM. Returns Payload size of datagram enqueued on * success, error code otherwise. */ static int qp_notify_peer(bool attach, struct vmci_handle handle, u32 my_id, u32 peer_id) { int rv; struct vmci_event_qp ev; if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID || peer_id == VMCI_INVALID_ID) return VMCI_ERROR_INVALID_ARGS; /* * In vmci_ctx_enqueue_datagram() we enforce the upper limit on * number of pending events from the hypervisor to a given VM * otherwise a rogue VM could do an arbitrary number of attach * and detach operations causing memory pressure in the host * kernel. */ memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); ev.msg.event_data.event = attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; ev.payload.handle = handle; ev.payload.peer_id = my_id; rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID, &ev.msg.hdr, false); if (rv < VMCI_SUCCESS) pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n", attach ? "ATTACH" : "DETACH", peer_id); return rv; } /* * The second endpoint issuing a queue pair allocation will attach to * the queue pair registered with the queue pair broker. * * If the attacher is a guest, it will associate a VMX virtual address * range with the queue pair as specified by the page_store. At this * point, the already attach host endpoint may start using the queue * pair, and an attach event is sent to it. For compatibility with * older VMX'en, that used a separate step to set the VMX virtual * address range, the virtual address range can be registered later * using vmci_qp_broker_set_page_store. In that case, a page_store of * NULL should be used, and the attach event will be generated once * the actual page store has been set. * * If the attacher is the host, a page_store of NULL should be used as * well, since the page store information is already set by the guest. * * For new VMX and host callers, the queue pair will be moved to the * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be * moved to the VMCOQPB_ATTACHED_NO_MEM state. */ static int qp_broker_attach(struct qp_broker_entry *entry, u32 peer, u32 flags, u32 priv_flags, u64 produce_size, u64 consume_size, struct vmci_qp_page_store *page_store, struct vmci_ctx *context, vmci_event_release_cb wakeup_cb, void *client_data, struct qp_broker_entry **ent) { const u32 context_id = vmci_ctx_get_id(context); bool is_local = flags & VMCI_QPFLAG_LOCAL; int result; if (entry->state != VMCIQPB_CREATED_NO_MEM && entry->state != VMCIQPB_CREATED_MEM) return VMCI_ERROR_UNAVAILABLE; if (is_local) { if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) || context_id != entry->create_id) { return VMCI_ERROR_INVALID_ARGS; } } else if (context_id == entry->create_id || context_id == entry->attach_id) { return VMCI_ERROR_ALREADY_EXISTS; } if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(entry->create_id)) return VMCI_ERROR_DST_UNREACHABLE; /* * If we are attaching from a restricted context then the queuepair * must have been created by a trusted endpoint. */ if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) && !entry->created_by_trusted) return VMCI_ERROR_NO_ACCESS; /* * If we are attaching to a queuepair that was created by a restricted * context then we must be trusted. */ if (entry->require_trusted_attach && (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED))) return VMCI_ERROR_NO_ACCESS; /* * If the creator specifies VMCI_INVALID_ID in "peer" field, access * control check is not performed. */ if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id) return VMCI_ERROR_NO_ACCESS; if (entry->create_id == VMCI_HOST_CONTEXT_ID) { /* * Do not attach if the caller doesn't support Host Queue Pairs * and a host created this queue pair. */ if (!vmci_ctx_supports_host_qp(context)) return VMCI_ERROR_INVALID_RESOURCE; } else if (context_id == VMCI_HOST_CONTEXT_ID) { struct vmci_ctx *create_context; bool supports_host_qp; /* * Do not attach a host to a user created queue pair if that * user doesn't support host queue pair end points. */ create_context = vmci_ctx_get(entry->create_id); supports_host_qp = vmci_ctx_supports_host_qp(create_context); vmci_ctx_put(create_context); if (!supports_host_qp) return VMCI_ERROR_INVALID_RESOURCE; } if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER)) return VMCI_ERROR_QUEUEPAIR_MISMATCH; if (context_id != VMCI_HOST_CONTEXT_ID) { /* * The queue pair broker entry stores values from the guest * point of view, so an attaching guest should match the values * stored in the entry. */ if (entry->qp.produce_size != produce_size || entry->qp.consume_size != consume_size) { return VMCI_ERROR_QUEUEPAIR_MISMATCH; } } else if (entry->qp.produce_size != consume_size || entry->qp.consume_size != produce_size) { return VMCI_ERROR_QUEUEPAIR_MISMATCH; } if (context_id != VMCI_HOST_CONTEXT_ID) { /* * If a guest attached to a queue pair, it will supply * the backing memory. If this is a pre NOVMVM vmx, * the backing memory will be supplied by calling * vmci_qp_broker_set_page_store() following the * return of the vmci_qp_broker_alloc() call. If it is * a vmx of version NOVMVM or later, the page store * must be supplied as part of the * vmci_qp_broker_alloc call. Under all circumstances * must the initially created queue pair not have any * memory associated with it already. */ if (entry->state != VMCIQPB_CREATED_NO_MEM) return VMCI_ERROR_INVALID_ARGS; if (page_store != NULL) { /* * Patch up host state to point to guest * supplied memory. The VMX already * initialized the queue pair headers, so no * need for the kernel side to do that. */ result = qp_host_register_user_memory(page_store, entry->produce_q, entry->consume_q); if (result < VMCI_SUCCESS) return result; entry->state = VMCIQPB_ATTACHED_MEM; } else { entry->state = VMCIQPB_ATTACHED_NO_MEM; } } else if (entry->state == VMCIQPB_CREATED_NO_MEM) { /* * The host side is attempting to attach to a queue * pair that doesn't have any memory associated with * it. This must be a pre NOVMVM vmx that hasn't set * the page store information yet, or a quiesced VM. */ return VMCI_ERROR_UNAVAILABLE; } else { /* The host side has successfully attached to a queue pair. */ entry->state = VMCIQPB_ATTACHED_MEM; } if (entry->state == VMCIQPB_ATTACHED_MEM) { result = qp_notify_peer(true, entry->qp.handle, context_id, entry->create_id); if (result < VMCI_SUCCESS) pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n", entry->create_id, entry->qp.handle.context, entry->qp.handle.resource); } entry->attach_id = context_id; entry->qp.ref_count++; if (wakeup_cb) { entry->wakeup_cb = wakeup_cb; entry->client_data = client_data; } /* * When attaching to local queue pairs, the context already has * an entry tracking the queue pair, so don't add another one. */ if (!is_local) vmci_ctx_qp_create(context, entry->qp.handle); if (ent != NULL) *ent = entry; return VMCI_SUCCESS; } /* * queue_pair_Alloc for use when setting up queue pair endpoints * on the host. */ static int qp_broker_alloc(struct vmci_handle handle, u32 peer, u32 flags, u32 priv_flags, u64 produce_size, u64 consume_size, struct vmci_qp_page_store *page_store, struct vmci_ctx *context, vmci_event_release_cb wakeup_cb, void *client_data, struct qp_broker_entry **ent, bool *swap) { const u32 context_id = vmci_ctx_get_id(context); bool create; struct qp_broker_entry *entry = NULL; bool is_local = flags & VMCI_QPFLAG_LOCAL; int result; if (vmci_handle_is_invalid(handle) || (flags & ~VMCI_QP_ALL_FLAGS) || is_local || !(produce_size || consume_size) || !context || context_id == VMCI_INVALID_ID || handle.context == VMCI_INVALID_ID) { return VMCI_ERROR_INVALID_ARGS; } if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store)) return VMCI_ERROR_INVALID_ARGS; /* * In the initial argument check, we ensure that non-vmkernel hosts * are not allowed to create local queue pairs. */ mutex_lock(&qp_broker_list.mutex); if (!is_local && vmci_ctx_qp_exists(context, handle)) { pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n", context_id, handle.context, handle.resource); mutex_unlock(&qp_broker_list.mutex); return VMCI_ERROR_ALREADY_EXISTS; } if (handle.resource != VMCI_INVALID_ID) entry = qp_broker_handle_to_entry(handle); if (!entry) { create = true; result = qp_broker_create(handle, peer, flags, priv_flags, produce_size, consume_size, page_store, context, wakeup_cb, client_data, ent); } else { create = false; result = qp_broker_attach(entry, peer, flags, priv_flags, produce_size, consume_size, page_store, context, wakeup_cb, client_data, ent); } mutex_unlock(&qp_broker_list.mutex); if (swap) *swap = (context_id == VMCI_HOST_CONTEXT_ID) && !(create && is_local); return result; } /* * This function implements the kernel API for allocating a queue * pair. */ static int qp_alloc_host_work(struct vmci_handle *handle, struct vmci_queue **produce_q, u64 produce_size, struct vmci_queue **consume_q, u64 consume_size, u32 peer, u32 flags, u32 priv_flags, vmci_event_release_cb wakeup_cb, void *client_data) { struct vmci_handle new_handle; struct vmci_ctx *context; struct qp_broker_entry *entry; int result; bool swap; if (vmci_handle_is_invalid(*handle)) { new_handle = vmci_make_handle( VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID); } else new_handle = *handle; context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID); entry = NULL; result = qp_broker_alloc(new_handle, peer, flags, priv_flags, produce_size, consume_size, NULL, context, wakeup_cb, client_data, &entry, &swap); if (result == VMCI_SUCCESS) { if (swap) { /* * If this is a local queue pair, the attacher * will swap around produce and consume * queues. */ *produce_q = entry->consume_q; *consume_q = entry->produce_q; } else { *produce_q = entry->produce_q; *consume_q = entry->consume_q; } *handle = vmci_resource_handle(&entry->resource); } else { *handle = VMCI_INVALID_HANDLE; pr_devel("queue pair broker failed to alloc (result=%d)\n", result); } vmci_ctx_put(context); return result; } /* * Allocates a VMCI queue_pair. Only checks validity of input * arguments. The real work is done in the host or guest * specific function. */ int vmci_qp_alloc(struct vmci_handle *handle, struct vmci_queue **produce_q, u64 produce_size, struct vmci_queue **consume_q, u64 consume_size, u32 peer, u32 flags, u32 priv_flags, bool guest_endpoint, vmci_event_release_cb wakeup_cb, void *client_data) { if (!handle || !produce_q || !consume_q || (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS)) return VMCI_ERROR_INVALID_ARGS; if (guest_endpoint) { return qp_alloc_guest_work(handle, produce_q, produce_size, consume_q, consume_size, peer, flags, priv_flags); } else { return qp_alloc_host_work(handle, produce_q, produce_size, consume_q, consume_size, peer, flags, priv_flags, wakeup_cb, client_data); } } /* * This function implements the host kernel API for detaching from * a queue pair. */ static int qp_detatch_host_work(struct vmci_handle handle) { int result; struct vmci_ctx *context; context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID); result = vmci_qp_broker_detach(handle, context); vmci_ctx_put(context); return result; } /* * Detaches from a VMCI queue_pair. Only checks validity of input argument. * Real work is done in the host or guest specific function. */ static int qp_detatch(struct vmci_handle handle, bool guest_endpoint) { if (vmci_handle_is_invalid(handle)) return VMCI_ERROR_INVALID_ARGS; if (guest_endpoint) return qp_detatch_guest_work(handle); else return qp_detatch_host_work(handle); } /* * Returns the entry from the head of the list. Assumes that the list is * locked. */ static struct qp_entry *qp_list_get_head(struct qp_list *qp_list) { if (!list_empty(&qp_list->head)) { struct qp_entry *entry = list_first_entry(&qp_list->head, struct qp_entry, list_item); return entry; } return NULL; } void vmci_qp_broker_exit(void) { struct qp_entry *entry; struct qp_broker_entry *be; mutex_lock(&qp_broker_list.mutex); while ((entry = qp_list_get_head(&qp_broker_list))) { be = (struct qp_broker_entry *)entry; qp_list_remove_entry(&qp_broker_list, entry); kfree(be); } mutex_unlock(&qp_broker_list.mutex); } /* * Requests that a queue pair be allocated with the VMCI queue * pair broker. Allocates a queue pair entry if one does not * exist. Attaches to one if it exists, and retrieves the page * files backing that queue_pair. Assumes that the queue pair * broker lock is held. */ int vmci_qp_broker_alloc(struct vmci_handle handle, u32 peer, u32 flags, u32 priv_flags, u64 produce_size, u64 consume_size, struct vmci_qp_page_store *page_store, struct vmci_ctx *context) { if (!QP_SIZES_ARE_VALID(produce_size, consume_size)) return VMCI_ERROR_NO_RESOURCES; return qp_broker_alloc(handle, peer, flags, priv_flags, produce_size, consume_size, page_store, context, NULL, NULL, NULL, NULL); } /* * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate * step to add the UVAs of the VMX mapping of the queue pair. This function * provides backwards compatibility with such VMX'en, and takes care of * registering the page store for a queue pair previously allocated by the * VMX during create or attach. This function will move the queue pair state * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the * attached state with memory, the queue pair is ready to be used by the * host peer, and an attached event will be generated. * * Assumes that the queue pair broker lock is held. * * This function is only used by the hosted platform, since there is no * issue with backwards compatibility for vmkernel. */ int vmci_qp_broker_set_page_store(struct vmci_handle handle, u64 produce_uva, u64 consume_uva, struct vmci_ctx *context) { struct qp_broker_entry *entry; int result; const u32 context_id = vmci_ctx_get_id(context); if (vmci_handle_is_invalid(handle) || !context || context_id == VMCI_INVALID_ID) return VMCI_ERROR_INVALID_ARGS; /* * We only support guest to host queue pairs, so the VMX must * supply UVAs for the mapped page files. */ if (produce_uva == 0 || consume_uva == 0) return VMCI_ERROR_INVALID_ARGS; mutex_lock(&qp_broker_list.mutex); if (!vmci_ctx_qp_exists(context, handle)) { pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", context_id, handle.context, handle.resource); result = VMCI_ERROR_NOT_FOUND; goto out; } entry = qp_broker_handle_to_entry(handle); if (!entry) { result = VMCI_ERROR_NOT_FOUND; goto out; } /* * If I'm the owner then I can set the page store. * * Or, if a host created the queue_pair and I'm the attached peer * then I can set the page store. */ if (entry->create_id != context_id && (entry->create_id != VMCI_HOST_CONTEXT_ID || entry->attach_id != context_id)) { result = VMCI_ERROR_QUEUEPAIR_NOTOWNER; goto out; } if (entry->state != VMCIQPB_CREATED_NO_MEM && entry->state != VMCIQPB_ATTACHED_NO_MEM) { result = VMCI_ERROR_UNAVAILABLE; goto out; } result = qp_host_get_user_memory(produce_uva, consume_uva, entry->produce_q, entry->consume_q); if (result < VMCI_SUCCESS) goto out; result = qp_host_map_queues(entry->produce_q, entry->consume_q); if (result < VMCI_SUCCESS) { qp_host_unregister_user_memory(entry->produce_q, entry->consume_q); goto out; } if (entry->state == VMCIQPB_CREATED_NO_MEM) entry->state = VMCIQPB_CREATED_MEM; else entry->state = VMCIQPB_ATTACHED_MEM; entry->vmci_page_files = true; if (entry->state == VMCIQPB_ATTACHED_MEM) { result = qp_notify_peer(true, handle, context_id, entry->create_id); if (result < VMCI_SUCCESS) { pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n", entry->create_id, entry->qp.handle.context, entry->qp.handle.resource); } } result = VMCI_SUCCESS; out: mutex_unlock(&qp_broker_list.mutex); return result; } /* * Resets saved queue headers for the given QP broker * entry. Should be used when guest memory becomes available * again, or the guest detaches. */ static void qp_reset_saved_headers(struct qp_broker_entry *entry) { entry->produce_q->saved_header = NULL; entry->consume_q->saved_header = NULL; } /* * The main entry point for detaching from a queue pair registered with the * queue pair broker. If more than one endpoint is attached to the queue * pair, the first endpoint will mainly decrement a reference count and * generate a notification to its peer. The last endpoint will clean up * the queue pair state registered with the broker. * * When a guest endpoint detaches, it will unmap and unregister the guest * memory backing the queue pair. If the host is still attached, it will * no longer be able to access the queue pair content. * * If the queue pair is already in a state where there is no memory * registered for the queue pair (any *_NO_MEM state), it will transition to * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest * endpoint is the first of two endpoints to detach. If the host endpoint is * the first out of two to detach, the queue pair will move to the * VMCIQPB_SHUTDOWN_MEM state. */ int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context) { struct qp_broker_entry *entry; const u32 context_id = vmci_ctx_get_id(context); u32 peer_id; bool is_local = false; int result; if (vmci_handle_is_invalid(handle) || !context || context_id == VMCI_INVALID_ID) { return VMCI_ERROR_INVALID_ARGS; } mutex_lock(&qp_broker_list.mutex); if (!vmci_ctx_qp_exists(context, handle)) { pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", context_id, handle.context, handle.resource); result = VMCI_ERROR_NOT_FOUND; goto out; } entry = qp_broker_handle_to_entry(handle); if (!entry) { pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n", context_id, handle.context, handle.resource); result = VMCI_ERROR_NOT_FOUND; goto out; } if (context_id != entry->create_id && context_id != entry->attach_id) { result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; goto out; } if (context_id == entry->create_id) { peer_id = entry->attach_id; entry->create_id = VMCI_INVALID_ID; } else { peer_id = entry->create_id; entry->attach_id = VMCI_INVALID_ID; } entry->qp.ref_count--; is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL; if (context_id != VMCI_HOST_CONTEXT_ID) { bool headers_mapped; /* * Pre NOVMVM vmx'en may detach from a queue pair * before setting the page store, and in that case * there is no user memory to detach from. Also, more * recent VMX'en may detach from a queue pair in the * quiesced state. */ qp_acquire_queue_mutex(entry->produce_q); headers_mapped = entry->produce_q->q_header || entry->consume_q->q_header; if (QPBROKERSTATE_HAS_MEM(entry)) { result = qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID, entry->produce_q, entry->consume_q); if (result < VMCI_SUCCESS) pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n", handle.context, handle.resource, result); qp_host_unregister_user_memory(entry->produce_q, entry->consume_q); } if (!headers_mapped) qp_reset_saved_headers(entry); qp_release_queue_mutex(entry->produce_q); if (!headers_mapped && entry->wakeup_cb) entry->wakeup_cb(entry->client_data); } else { if (entry->wakeup_cb) { entry->wakeup_cb = NULL; entry->client_data = NULL; } } if (entry->qp.ref_count == 0) { qp_list_remove_entry(&qp_broker_list, &entry->qp); if (is_local) kfree(entry->local_mem); qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q); qp_host_free_queue(entry->produce_q, entry->qp.produce_size); qp_host_free_queue(entry->consume_q, entry->qp.consume_size); /* Unlink from resource hash table and free callback */ vmci_resource_remove(&entry->resource); kfree(entry); vmci_ctx_qp_destroy(context, handle); } else { qp_notify_peer(false, handle, context_id, peer_id); if (context_id == VMCI_HOST_CONTEXT_ID && QPBROKERSTATE_HAS_MEM(entry)) { entry->state = VMCIQPB_SHUTDOWN_MEM; } else { entry->state = VMCIQPB_SHUTDOWN_NO_MEM; } if (!is_local) vmci_ctx_qp_destroy(context, handle); } result = VMCI_SUCCESS; out: mutex_unlock(&qp_broker_list.mutex); return result; } /* * Establishes the necessary mappings for a queue pair given a * reference to the queue pair guest memory. This is usually * called when a guest is unquiesced and the VMX is allowed to * map guest memory once again. */ int vmci_qp_broker_map(struct vmci_handle handle, struct vmci_ctx *context, u64 guest_mem) { struct qp_broker_entry *entry; const u32 context_id = vmci_ctx_get_id(context); int result; if (vmci_handle_is_invalid(handle) || !context || context_id == VMCI_INVALID_ID) return VMCI_ERROR_INVALID_ARGS; mutex_lock(&qp_broker_list.mutex); if (!vmci_ctx_qp_exists(context, handle)) { pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", context_id, handle.context, handle.resource); result = VMCI_ERROR_NOT_FOUND; goto out; } entry = qp_broker_handle_to_entry(handle); if (!entry) { pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n", context_id, handle.context, handle.resource); result = VMCI_ERROR_NOT_FOUND; goto out; } if (context_id != entry->create_id && context_id != entry->attach_id) { result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; goto out; } result = VMCI_SUCCESS; if (context_id != VMCI_HOST_CONTEXT_ID && !QPBROKERSTATE_HAS_MEM(entry)) { struct vmci_qp_page_store page_store; page_store.pages = guest_mem; page_store.len = QPE_NUM_PAGES(entry->qp); qp_acquire_queue_mutex(entry->produce_q); qp_reset_saved_headers(entry); result = qp_host_register_user_memory(&page_store, entry->produce_q, entry->consume_q); qp_release_queue_mutex(entry->produce_q); if (result == VMCI_SUCCESS) { /* Move state from *_NO_MEM to *_MEM */ entry->state++; if (entry->wakeup_cb) entry->wakeup_cb(entry->client_data); } } out: mutex_unlock(&qp_broker_list.mutex); return result; } /* * Saves a snapshot of the queue headers for the given QP broker * entry. Should be used when guest memory is unmapped. * Results: * VMCI_SUCCESS on success, appropriate error code if guest memory * can't be accessed.. */ static int qp_save_headers(struct qp_broker_entry *entry) { int result; if (entry->produce_q->saved_header != NULL && entry->consume_q->saved_header != NULL) { /* * If the headers have already been saved, we don't need to do * it again, and we don't want to map in the headers * unnecessarily. */ return VMCI_SUCCESS; } if (NULL == entry->produce_q->q_header || NULL == entry->consume_q->q_header) { result = qp_host_map_queues(entry->produce_q, entry->consume_q); if (result < VMCI_SUCCESS) return result; } memcpy(&entry->saved_produce_q, entry->produce_q->q_header, sizeof(entry->saved_produce_q)); entry->produce_q->saved_header = &entry->saved_produce_q; memcpy(&entry->saved_consume_q, entry->consume_q->q_header, sizeof(entry->saved_consume_q)); entry->consume_q->saved_header = &entry->saved_consume_q; return VMCI_SUCCESS; } /* * Removes all references to the guest memory of a given queue pair, and * will move the queue pair from state *_MEM to *_NO_MEM. It is usually * called when a VM is being quiesced where access to guest memory should * avoided. */ int vmci_qp_broker_unmap(struct vmci_handle handle, struct vmci_ctx *context, u32 gid) { struct qp_broker_entry *entry; const u32 context_id = vmci_ctx_get_id(context); int result; if (vmci_handle_is_invalid(handle) || !context || context_id == VMCI_INVALID_ID) return VMCI_ERROR_INVALID_ARGS; mutex_lock(&qp_broker_list.mutex); if (!vmci_ctx_qp_exists(context, handle)) { pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", context_id, handle.context, handle.resource); result = VMCI_ERROR_NOT_FOUND; goto out; } entry = qp_broker_handle_to_entry(handle); if (!entry) { pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n", context_id, handle.context, handle.resource); result = VMCI_ERROR_NOT_FOUND; goto out; } if (context_id != entry->create_id && context_id != entry->attach_id) { result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; goto out; } if (context_id != VMCI_HOST_CONTEXT_ID && QPBROKERSTATE_HAS_MEM(entry)) { qp_acquire_queue_mutex(entry->produce_q); result = qp_save_headers(entry); if (result < VMCI_SUCCESS) pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n", handle.context, handle.resource, result); qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q); /* * On hosted, when we unmap queue pairs, the VMX will also * unmap the guest memory, so we invalidate the previously * registered memory. If the queue pair is mapped again at a * later point in time, we will need to reregister the user * memory with a possibly new user VA. */ qp_host_unregister_user_memory(entry->produce_q, entry->consume_q); /* * Move state from *_MEM to *_NO_MEM. */ entry->state--; qp_release_queue_mutex(entry->produce_q); } result = VMCI_SUCCESS; out: mutex_unlock(&qp_broker_list.mutex); return result; } /* * Destroys all guest queue pair endpoints. If active guest queue * pairs still exist, hypercalls to attempt detach from these * queue pairs will be made. Any failure to detach is silently * ignored. */ void vmci_qp_guest_endpoints_exit(void) { struct qp_entry *entry; struct qp_guest_endpoint *ep; mutex_lock(&qp_guest_endpoints.mutex); while ((entry = qp_list_get_head(&qp_guest_endpoints))) { ep = (struct qp_guest_endpoint *)entry; /* Don't make a hypercall for local queue_pairs. */ if (!(entry->flags & VMCI_QPFLAG_LOCAL)) qp_detatch_hypercall(entry->handle); /* We cannot fail the exit, so let's reset ref_count. */ entry->ref_count = 0; qp_list_remove_entry(&qp_guest_endpoints, entry); qp_guest_endpoint_destroy(ep); } mutex_unlock(&qp_guest_endpoints.mutex); } /* * Helper routine that will lock the queue pair before subsequent * operations. * Note: Non-blocking on the host side is currently only implemented in ESX. * Since non-blocking isn't yet implemented on the host personality we * have no reason to acquire a spin lock. So to avoid the use of an * unnecessary lock only acquire the mutex if we can block. */ static void qp_lock(const struct vmci_qp *qpair) { qp_acquire_queue_mutex(qpair->produce_q); } /* * Helper routine that unlocks the queue pair after calling * qp_lock. */ static void qp_unlock(const struct vmci_qp *qpair) { qp_release_queue_mutex(qpair->produce_q); } /* * The queue headers may not be mapped at all times. If a queue is * currently not mapped, it will be attempted to do so. */ static int qp_map_queue_headers(struct vmci_queue *produce_q, struct vmci_queue *consume_q) { int result; if (NULL == produce_q->q_header || NULL == consume_q->q_header) { result = qp_host_map_queues(produce_q, consume_q); if (result < VMCI_SUCCESS) return (produce_q->saved_header && consume_q->saved_header) ? VMCI_ERROR_QUEUEPAIR_NOT_READY : VMCI_ERROR_QUEUEPAIR_NOTATTACHED; } return VMCI_SUCCESS; } /* * Helper routine that will retrieve the produce and consume * headers of a given queue pair. If the guest memory of the * queue pair is currently not available, the saved queue headers * will be returned, if these are available. */ static int qp_get_queue_headers(const struct vmci_qp *qpair, struct vmci_queue_header **produce_q_header, struct vmci_queue_header **consume_q_header) { int result; result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q); if (result == VMCI_SUCCESS) { *produce_q_header = qpair->produce_q->q_header; *consume_q_header = qpair->consume_q->q_header; } else if (qpair->produce_q->saved_header && qpair->consume_q->saved_header) { *produce_q_header = qpair->produce_q->saved_header; *consume_q_header = qpair->consume_q->saved_header; result = VMCI_SUCCESS; } return result; } /* * Callback from VMCI queue pair broker indicating that a queue * pair that was previously not ready, now either is ready or * gone forever. */ static int qp_wakeup_cb(void *client_data) { struct vmci_qp *qpair = (struct vmci_qp *)client_data; qp_lock(qpair); while (qpair->blocked > 0) { qpair->blocked--; qpair->generation++; wake_up(&qpair->event); } qp_unlock(qpair); return VMCI_SUCCESS; } /* * Makes the calling thread wait for the queue pair to become * ready for host side access. Returns true when thread is * woken up after queue pair state change, false otherwise. */ static bool qp_wait_for_ready_queue(struct vmci_qp *qpair) { unsigned int generation; qpair->blocked++; generation = qpair->generation; qp_unlock(qpair); wait_event(qpair->event, generation != qpair->generation); qp_lock(qpair); return true; } /* * Enqueues a given buffer to the produce queue using the provided * function. As many bytes as possible (space available in the queue) * are enqueued. Assumes the queue->mutex has been acquired. Returns * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if * an error occured when accessing the buffer, * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't * available. Otherwise, the number of bytes written to the queue is * returned. Updates the tail pointer of the produce queue. */ static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q, struct vmci_queue *consume_q, const u64 produce_q_size, struct iov_iter *from) { s64 free_space; u64 tail; size_t buf_size = iov_iter_count(from); size_t written; ssize_t result; result = qp_map_queue_headers(produce_q, consume_q); if (unlikely(result != VMCI_SUCCESS)) return result; free_space = vmci_q_header_free_space(produce_q->q_header, consume_q->q_header, produce_q_size); if (free_space == 0) return VMCI_ERROR_QUEUEPAIR_NOSPACE; if (free_space < VMCI_SUCCESS) return (ssize_t) free_space; written = (size_t) (free_space > buf_size ? buf_size : free_space); tail = vmci_q_header_producer_tail(produce_q->q_header); if (likely(tail + written < produce_q_size)) { result = qp_memcpy_to_queue_iter(produce_q, tail, from, written); } else { /* Tail pointer wraps around. */ const size_t tmp = (size_t) (produce_q_size - tail); result = qp_memcpy_to_queue_iter(produce_q, tail, from, tmp); if (result >= VMCI_SUCCESS) result = qp_memcpy_to_queue_iter(produce_q, 0, from, written - tmp); } if (result < VMCI_SUCCESS) return result; /* * This virt_wmb() ensures that data written to the queue * is observable before the new producer_tail is. */ virt_wmb(); vmci_q_header_add_producer_tail(produce_q->q_header, written, produce_q_size); return written; } /* * Dequeues data (if available) from the given consume queue. Writes data * to the user provided buffer using the provided function. * Assumes the queue->mutex has been acquired. * Results: * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue. * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue * (as defined by the queue size). * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer. * Otherwise the number of bytes dequeued is returned. * Side effects: * Updates the head pointer of the consume queue. */ static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q, struct vmci_queue *consume_q, const u64 consume_q_size, struct iov_iter *to, bool update_consumer) { size_t buf_size = iov_iter_count(to); s64 buf_ready; u64 head; size_t read; ssize_t result; result = qp_map_queue_headers(produce_q, consume_q); if (unlikely(result != VMCI_SUCCESS)) return result; buf_ready = vmci_q_header_buf_ready(consume_q->q_header, produce_q->q_header, consume_q_size); if (buf_ready == 0) return VMCI_ERROR_QUEUEPAIR_NODATA; if (buf_ready < VMCI_SUCCESS) return (ssize_t) buf_ready; /* * This virt_rmb() ensures that data from the queue will be read * after we have determined how much is ready to be consumed. */ virt_rmb(); read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready); head = vmci_q_header_consumer_head(produce_q->q_header); if (likely(head + read < consume_q_size)) { result = qp_memcpy_from_queue_iter(to, consume_q, head, read); } else { /* Head pointer wraps around. */ const size_t tmp = (size_t) (consume_q_size - head); result = qp_memcpy_from_queue_iter(to, consume_q, head, tmp); if (result >= VMCI_SUCCESS) result = qp_memcpy_from_queue_iter(to, consume_q, 0, read - tmp); } if (result < VMCI_SUCCESS) return result; if (update_consumer) vmci_q_header_add_consumer_head(produce_q->q_header, read, consume_q_size); return read; } /* * vmci_qpair_alloc() - Allocates a queue pair. * @qpair: Pointer for the new vmci_qp struct. * @handle: Handle to track the resource. * @produce_qsize: Desired size of the producer queue. * @consume_qsize: Desired size of the consumer queue. * @peer: ContextID of the peer. * @flags: VMCI flags. * @priv_flags: VMCI priviledge flags. * * This is the client interface for allocating the memory for a * vmci_qp structure and then attaching to the underlying * queue. If an error occurs allocating the memory for the * vmci_qp structure no attempt is made to attach. If an * error occurs attaching, then the structure is freed. */ int vmci_qpair_alloc(struct vmci_qp **qpair, struct vmci_handle *handle, u64 produce_qsize, u64 consume_qsize, u32 peer, u32 flags, u32 priv_flags) { struct vmci_qp *my_qpair; int retval; struct vmci_handle src = VMCI_INVALID_HANDLE; struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID); enum vmci_route route; vmci_event_release_cb wakeup_cb; void *client_data; /* * Restrict the size of a queuepair. The device already * enforces a limit on the total amount of memory that can be * allocated to queuepairs for a guest. However, we try to * allocate this memory before we make the queuepair * allocation hypercall. On Linux, we allocate each page * separately, which means rather than fail, the guest will * thrash while it tries to allocate, and will become * increasingly unresponsive to the point where it appears to * be hung. So we place a limit on the size of an individual * queuepair here, and leave the device to enforce the * restriction on total queuepair memory. (Note that this * doesn't prevent all cases; a user with only this much * physical memory could still get into trouble.) The error * used by the device is NO_RESOURCES, so use that here too. */ if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize)) return VMCI_ERROR_NO_RESOURCES; retval = vmci_route(&src, &dst, false, &route); if (retval < VMCI_SUCCESS) route = vmci_guest_code_active() ? VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST; if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) { pr_devel("NONBLOCK OR PINNED set"); return VMCI_ERROR_INVALID_ARGS; } my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL); if (!my_qpair) return VMCI_ERROR_NO_MEM; my_qpair->produce_q_size = produce_qsize; my_qpair->consume_q_size = consume_qsize; my_qpair->peer = peer; my_qpair->flags = flags; my_qpair->priv_flags = priv_flags; wakeup_cb = NULL; client_data = NULL; if (VMCI_ROUTE_AS_HOST == route) { my_qpair->guest_endpoint = false; if (!(flags & VMCI_QPFLAG_LOCAL)) { my_qpair->blocked = 0; my_qpair->generation = 0; init_waitqueue_head(&my_qpair->event); wakeup_cb = qp_wakeup_cb; client_data = (void *)my_qpair; } } else { my_qpair->guest_endpoint = true; } retval = vmci_qp_alloc(handle, &my_qpair->produce_q, my_qpair->produce_q_size, &my_qpair->consume_q, my_qpair->consume_q_size, my_qpair->peer, my_qpair->flags, my_qpair->priv_flags, my_qpair->guest_endpoint, wakeup_cb, client_data); if (retval < VMCI_SUCCESS) { kfree(my_qpair); return retval; } *qpair = my_qpair; my_qpair->handle = *handle; return retval; } EXPORT_SYMBOL_GPL(vmci_qpair_alloc); /* * vmci_qpair_detach() - Detatches the client from a queue pair. * @qpair: Reference of a pointer to the qpair struct. * * This is the client interface for detaching from a VMCIQPair. * Note that this routine will free the memory allocated for the * vmci_qp structure too. */ int vmci_qpair_detach(struct vmci_qp **qpair) { int result; struct vmci_qp *old_qpair; if (!qpair || !(*qpair)) return VMCI_ERROR_INVALID_ARGS; old_qpair = *qpair; result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint); /* * The guest can fail to detach for a number of reasons, and * if it does so, it will cleanup the entry (if there is one). * The host can fail too, but it won't cleanup the entry * immediately, it will do that later when the context is * freed. Either way, we need to release the qpair struct * here; there isn't much the caller can do, and we don't want * to leak. */ memset(old_qpair, 0, sizeof(*old_qpair)); old_qpair->handle = VMCI_INVALID_HANDLE; old_qpair->peer = VMCI_INVALID_ID; kfree(old_qpair); *qpair = NULL; return result; } EXPORT_SYMBOL_GPL(vmci_qpair_detach); /* * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer. * @qpair: Pointer to the queue pair struct. * @producer_tail: Reference used for storing producer tail index. * @consumer_head: Reference used for storing the consumer head index. * * This is the client interface for getting the current indexes of the * QPair from the point of the view of the caller as the producer. */ int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair, u64 *producer_tail, u64 *consumer_head) { struct vmci_queue_header *produce_q_header; struct vmci_queue_header *consume_q_header; int result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); result = qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); if (result == VMCI_SUCCESS) vmci_q_header_get_pointers(produce_q_header, consume_q_header, producer_tail, consumer_head); qp_unlock(qpair); if (result == VMCI_SUCCESS && ((producer_tail && *producer_tail >= qpair->produce_q_size) || (consumer_head && *consumer_head >= qpair->produce_q_size))) return VMCI_ERROR_INVALID_SIZE; return result; } EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes); /* * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the consumer. * @qpair: Pointer to the queue pair struct. * @consumer_tail: Reference used for storing consumer tail index. * @producer_head: Reference used for storing the producer head index. * * This is the client interface for getting the current indexes of the * QPair from the point of the view of the caller as the consumer. */ int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair, u64 *consumer_tail, u64 *producer_head) { struct vmci_queue_header *produce_q_header; struct vmci_queue_header *consume_q_header; int result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); result = qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); if (result == VMCI_SUCCESS) vmci_q_header_get_pointers(consume_q_header, produce_q_header, consumer_tail, producer_head); qp_unlock(qpair); if (result == VMCI_SUCCESS && ((consumer_tail && *consumer_tail >= qpair->consume_q_size) || (producer_head && *producer_head >= qpair->consume_q_size))) return VMCI_ERROR_INVALID_SIZE; return result; } EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes); /* * vmci_qpair_produce_free_space() - Retrieves free space in producer queue. * @qpair: Pointer to the queue pair struct. * * This is the client interface for getting the amount of free * space in the QPair from the point of the view of the caller as * the producer which is the common case. Returns < 0 if err, else * available bytes into which data can be enqueued if > 0. */ s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair) { struct vmci_queue_header *produce_q_header; struct vmci_queue_header *consume_q_header; s64 result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); result = qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); if (result == VMCI_SUCCESS) result = vmci_q_header_free_space(produce_q_header, consume_q_header, qpair->produce_q_size); else result = 0; qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space); /* * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue. * @qpair: Pointer to the queue pair struct. * * This is the client interface for getting the amount of free * space in the QPair from the point of the view of the caller as * the consumer which is not the common case. Returns < 0 if err, else * available bytes into which data can be enqueued if > 0. */ s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair) { struct vmci_queue_header *produce_q_header; struct vmci_queue_header *consume_q_header; s64 result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); result = qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); if (result == VMCI_SUCCESS) result = vmci_q_header_free_space(consume_q_header, produce_q_header, qpair->consume_q_size); else result = 0; qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space); /* * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from * producer queue. * @qpair: Pointer to the queue pair struct. * * This is the client interface for getting the amount of * enqueued data in the QPair from the point of the view of the * caller as the producer which is not the common case. Returns < 0 if err, * else available bytes that may be read. */ s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair) { struct vmci_queue_header *produce_q_header; struct vmci_queue_header *consume_q_header; s64 result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); result = qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); if (result == VMCI_SUCCESS) result = vmci_q_header_buf_ready(produce_q_header, consume_q_header, qpair->produce_q_size); else result = 0; qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready); /* * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from * consumer queue. * @qpair: Pointer to the queue pair struct. * * This is the client interface for getting the amount of * enqueued data in the QPair from the point of the view of the * caller as the consumer which is the normal case. Returns < 0 if err, * else available bytes that may be read. */ s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair) { struct vmci_queue_header *produce_q_header; struct vmci_queue_header *consume_q_header; s64 result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); result = qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); if (result == VMCI_SUCCESS) result = vmci_q_header_buf_ready(consume_q_header, produce_q_header, qpair->consume_q_size); else result = 0; qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready); /* * vmci_qpair_enqueue() - Throw data on the queue. * @qpair: Pointer to the queue pair struct. * @buf: Pointer to buffer containing data * @buf_size: Length of buffer. * @buf_type: Buffer type (Unused). * * This is the client interface for enqueueing data into the queue. * Returns number of bytes enqueued or < 0 on error. */ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair, const void *buf, size_t buf_size, int buf_type) { ssize_t result; struct iov_iter from; struct kvec v = {.iov_base = (void *)buf, .iov_len = buf_size}; if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; iov_iter_kvec(&from, ITER_SOURCE, &v, 1, buf_size); qp_lock(qpair); do { result = qp_enqueue_locked(qpair->produce_q, qpair->consume_q, qpair->produce_q_size, &from); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) result = VMCI_ERROR_WOULD_BLOCK; } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_enqueue); /* * vmci_qpair_dequeue() - Get data from the queue. * @qpair: Pointer to the queue pair struct. * @buf: Pointer to buffer for the data * @buf_size: Length of buffer. * @buf_type: Buffer type (Unused). * * This is the client interface for dequeueing data from the queue. * Returns number of bytes dequeued or < 0 on error. */ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, void *buf, size_t buf_size, int buf_type) { ssize_t result; struct iov_iter to; struct kvec v = {.iov_base = buf, .iov_len = buf_size}; if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; iov_iter_kvec(&to, ITER_DEST, &v, 1, buf_size); qp_lock(qpair); do { result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, &to, true); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) result = VMCI_ERROR_WOULD_BLOCK; } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_dequeue); /* * vmci_qpair_peek() - Peek at the data in the queue. * @qpair: Pointer to the queue pair struct. * @buf: Pointer to buffer for the data * @buf_size: Length of buffer. * @buf_type: Buffer type (Unused on Linux). * * This is the client interface for peeking into a queue. (I.e., * copy data from the queue without updating the head pointer.) * Returns number of bytes dequeued or < 0 on error. */ ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size, int buf_type) { struct iov_iter to; struct kvec v = {.iov_base = buf, .iov_len = buf_size}; ssize_t result; if (!qpair || !buf) return VMCI_ERROR_INVALID_ARGS; iov_iter_kvec(&to, ITER_DEST, &v, 1, buf_size); qp_lock(qpair); do { result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, &to, false); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) result = VMCI_ERROR_WOULD_BLOCK; } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_peek); /* * vmci_qpair_enquev() - Throw data on the queue using iov. * @qpair: Pointer to the queue pair struct. * @iov: Pointer to buffer containing data * @iov_size: Length of buffer. * @buf_type: Buffer type (Unused). * * This is the client interface for enqueueing data into the queue. * This function uses IO vectors to handle the work. Returns number * of bytes enqueued or < 0 on error. */ ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, int buf_type) { ssize_t result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); do { result = qp_enqueue_locked(qpair->produce_q, qpair->consume_q, qpair->produce_q_size, &msg->msg_iter); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) result = VMCI_ERROR_WOULD_BLOCK; } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_enquev); /* * vmci_qpair_dequev() - Get data from the queue using iov. * @qpair: Pointer to the queue pair struct. * @iov: Pointer to buffer for the data * @iov_size: Length of buffer. * @buf_type: Buffer type (Unused). * * This is the client interface for dequeueing data from the queue. * This function uses IO vectors to handle the work. Returns number * of bytes dequeued or < 0 on error. */ ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, int buf_type) { ssize_t result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); do { result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, &msg->msg_iter, true); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) result = VMCI_ERROR_WOULD_BLOCK; } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_dequev); /* * vmci_qpair_peekv() - Peek at the data in the queue using iov. * @qpair: Pointer to the queue pair struct. * @iov: Pointer to buffer for the data * @iov_size: Length of buffer. * @buf_type: Buffer type (Unused on Linux). * * This is the client interface for peeking into a queue. (I.e., * copy data from the queue without updating the head pointer.) * This function uses IO vectors to handle the work. Returns number * of bytes peeked or < 0 on error. */ ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size, int buf_type) { ssize_t result; if (!qpair) return VMCI_ERROR_INVALID_ARGS; qp_lock(qpair); do { result = qp_dequeue_locked(qpair->produce_q, qpair->consume_q, qpair->consume_q_size, &msg->msg_iter, false); if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && !qp_wait_for_ready_queue(qpair)) result = VMCI_ERROR_WOULD_BLOCK; } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); qp_unlock(qpair); return result; } EXPORT_SYMBOL_GPL(vmci_qpair_peekv);
5 1 1 2 1 1 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2015 Patrick McHardy <kaber@trash.net> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> struct nft_dynset { struct nft_set *set; struct nft_set_ext_tmpl tmpl; enum nft_dynset_ops op:8; u8 sreg_key; u8 sreg_data; bool invert; bool expr; u8 num_exprs; u64 timeout; struct nft_expr *expr_array[NFT_SET_EXPR_MAX]; struct nft_set_binding binding; }; static int nft_dynset_expr_setup(const struct nft_dynset *priv, const struct nft_set_ext *ext) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); struct nft_expr *expr; int i; for (i = 0; i < priv->num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0) return -1; elem_expr->size += priv->expr_array[i]->ops->size; } return 0; } static struct nft_elem_priv *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, struct nft_regs *regs) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set_ext *ext; void *elem_priv; u64 timeout; if (!atomic_add_unless(&set->nelems, 1, set->size)) return NULL; timeout = priv->timeout ? : set->timeout; elem_priv = nft_set_elem_init(set, &priv->tmpl, &regs->data[priv->sreg_key], NULL, &regs->data[priv->sreg_data], timeout, 0, GFP_ATOMIC); if (IS_ERR(elem_priv)) goto err1; ext = nft_set_elem_ext(set, elem_priv); if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0) goto err2; return elem_priv; err2: nft_set_elem_destroy(set, elem_priv, false); err1: if (set->size) atomic_dec(&set->nelems); return NULL; } void nft_dynset_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; const struct nft_set_ext *ext; u64 timeout; if (priv->op == NFT_DYNSET_OP_DELETE) { set->ops->delete(set, &regs->data[priv->sreg_key]); return; } if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new, expr, regs, &ext)) { if (priv->op == NFT_DYNSET_OP_UPDATE && nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { timeout = priv->timeout ? : set->timeout; *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout; } nft_set_elem_update_expr(ext, regs, pkt); if (priv->invert) regs->verdict.code = NFT_BREAK; return; } if (!priv->invert) regs->verdict.code = NFT_BREAK; } static void nft_dynset_ext_add_expr(struct nft_dynset *priv) { u8 size = 0; int i; for (i = 0; i < priv->num_exprs; i++) size += priv->expr_array[i]->ops->size; nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPRESSIONS, sizeof(struct nft_set_elem_expr) + size); } static struct nft_expr * nft_dynset_expr_alloc(const struct nft_ctx *ctx, const struct nft_set *set, const struct nlattr *attr, int pos) { struct nft_expr *expr; int err; expr = nft_set_elem_expr_alloc(ctx, set, attr); if (IS_ERR(expr)) return expr; if (set->exprs[pos] && set->exprs[pos]->ops != expr->ops) { err = -EOPNOTSUPP; goto err_dynset_expr; } return expr; err_dynset_expr: nft_expr_destroy(ctx, expr); return ERR_PTR(err); } static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 }, [NFTA_DYNSET_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 }, [NFTA_DYNSET_EXPRESSIONS] = { .type = NLA_NESTED }, }; static int nft_dynset_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); struct nft_dynset *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err, i; lockdep_assert_held(&nft_net->commit_mutex); if (tb[NFTA_DYNSET_SET_NAME] == NULL || tb[NFTA_DYNSET_OP] == NULL || tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; if (tb[NFTA_DYNSET_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); if (flags & ~(NFT_DYNSET_F_INV | NFT_DYNSET_F_EXPR)) return -EOPNOTSUPP; if (flags & NFT_DYNSET_F_INV) priv->invert = true; if (flags & NFT_DYNSET_F_EXPR) priv->expr = true; } set = nft_set_lookup_global(ctx->net, ctx->table, tb[NFTA_DYNSET_SET_NAME], tb[NFTA_DYNSET_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); if (set->flags & NFT_SET_OBJECT) return -EOPNOTSUPP; if (set->ops->update == NULL) return -EOPNOTSUPP; if (set->flags & NFT_SET_CONSTANT) return -EBUSY; priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); if (priv->op > NFT_DYNSET_OP_DELETE) return -EOPNOTSUPP; timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EOPNOTSUPP; err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); if (err) return err; } err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, set->klen); if (err < 0) return err; if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { if (!(set->flags & NFT_SET_MAP)) return -EOPNOTSUPP; if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA], &priv->sreg_data, set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) return -EINVAL; if ((tb[NFTA_DYNSET_EXPR] || tb[NFTA_DYNSET_EXPRESSIONS]) && !(set->flags & NFT_SET_EVAL)) return -EINVAL; if (tb[NFTA_DYNSET_EXPR]) { struct nft_expr *dynset_expr; dynset_expr = nft_dynset_expr_alloc(ctx, set, tb[NFTA_DYNSET_EXPR], 0); if (IS_ERR(dynset_expr)) return PTR_ERR(dynset_expr); priv->num_exprs++; priv->expr_array[0] = dynset_expr; if (set->num_exprs > 1 || (set->num_exprs == 1 && dynset_expr->ops != set->exprs[0]->ops)) { err = -EOPNOTSUPP; goto err_expr_free; } } else if (tb[NFTA_DYNSET_EXPRESSIONS]) { struct nft_expr *dynset_expr; struct nlattr *tmp; int left; if (!priv->expr) return -EINVAL; i = 0; nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { err = -E2BIG; goto err_expr_free; } if (nla_type(tmp) != NFTA_LIST_ELEM) { err = -EINVAL; goto err_expr_free; } dynset_expr = nft_dynset_expr_alloc(ctx, set, tmp, i); if (IS_ERR(dynset_expr)) { err = PTR_ERR(dynset_expr); goto err_expr_free; } priv->expr_array[i] = dynset_expr; priv->num_exprs++; if (set->num_exprs) { if (i >= set->num_exprs) { err = -EINVAL; goto err_expr_free; } if (dynset_expr->ops != set->exprs[i]->ops) { err = -EOPNOTSUPP; goto err_expr_free; } } i++; } if (set->num_exprs && set->num_exprs != i) { err = -EOPNOTSUPP; goto err_expr_free; } } else if (set->num_exprs > 0) { err = nft_set_elem_expr_clone(ctx, set, priv->expr_array); if (err < 0) return err; priv->num_exprs = set->num_exprs; } nft_set_ext_prepare(&priv->tmpl); nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); if (set->flags & NFT_SET_MAP) nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen); if (priv->num_exprs) nft_dynset_ext_add_expr(priv); if (set->flags & NFT_SET_TIMEOUT) { if (timeout || set->timeout) { nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT); nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); } } priv->timeout = timeout; err = nf_tables_bind_set(ctx, set, &priv->binding); if (err < 0) goto err_expr_free; if (set->size == 0) set->size = 0xffff; priv->set = set; return 0; err_expr_free: for (i = 0; i < priv->num_exprs; i++) nft_expr_destroy(ctx, priv->expr_array[i]); return err; } static void nft_dynset_deactivate(const struct nft_ctx *ctx, const struct nft_expr *expr, enum nft_trans_phase phase) { struct nft_dynset *priv = nft_expr_priv(expr); nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); } static void nft_dynset_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_dynset *priv = nft_expr_priv(expr); nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_dynset *priv = nft_expr_priv(expr); int i; for (i = 0; i < priv->num_exprs; i++) nft_expr_destroy(ctx, priv->expr_array[i]); nf_tables_destroy_set(ctx, priv->set); } static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_dynset *priv = nft_expr_priv(expr); u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0; int i; if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key)) goto nla_put_failure; if (priv->set->flags & NFT_SET_MAP && nft_dump_register(skb, NFTA_DYNSET_SREG_DATA, priv->sreg_data)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op))) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, nf_jiffies64_to_msecs(priv->timeout), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->set->num_exprs == 0) { if (priv->num_exprs == 1) { if (nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr_array[0], reset)) goto nla_put_failure; } else if (priv->num_exprs > 1) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, NFTA_DYNSET_EXPRESSIONS); if (!nest) goto nla_put_failure; for (i = 0; i < priv->num_exprs; i++) { if (nft_expr_dump(skb, NFTA_LIST_ELEM, priv->expr_array[i], reset)) goto nla_put_failure; } nla_nest_end(skb, nest); } } if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static const struct nft_expr_ops nft_dynset_ops = { .type = &nft_dynset_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_dynset_type __read_mostly = { .name = "dynset", .ops = &nft_dynset_ops, .policy = nft_dynset_policy, .maxattr = NFTA_DYNSET_MAX, .owner = THIS_MODULE, };
3 3 1 2 1 8 7 7 7 3 3 1 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for NXP PN533 NFC Chip - USB transport layer * * Copyright (C) 2011 Instituto Nokia de Tecnologia * Copyright (C) 2012-2013 Tieto Poland */ #include <linux/device.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/nfc.h> #include <linux/netdevice.h> #include <net/nfc/nfc.h> #include "pn533.h" #define VERSION "0.1" #define PN533_VENDOR_ID 0x4CC #define PN533_PRODUCT_ID 0x2533 #define SCM_VENDOR_ID 0x4E6 #define SCL3711_PRODUCT_ID 0x5591 #define SONY_VENDOR_ID 0x054c #define PASORI_PRODUCT_ID 0x02e1 #define ACS_VENDOR_ID 0x072f #define ACR122U_PRODUCT_ID 0x2200 static const struct usb_device_id pn533_usb_table[] = { { USB_DEVICE(PN533_VENDOR_ID, PN533_PRODUCT_ID), .driver_info = PN533_DEVICE_STD }, { USB_DEVICE(SCM_VENDOR_ID, SCL3711_PRODUCT_ID), .driver_info = PN533_DEVICE_STD }, { USB_DEVICE(SONY_VENDOR_ID, PASORI_PRODUCT_ID), .driver_info = PN533_DEVICE_PASORI }, { USB_DEVICE(ACS_VENDOR_ID, ACR122U_PRODUCT_ID), .driver_info = PN533_DEVICE_ACR122U }, { } }; MODULE_DEVICE_TABLE(usb, pn533_usb_table); struct pn533_usb_phy { struct usb_device *udev; struct usb_interface *interface; struct urb *out_urb; struct urb *in_urb; struct urb *ack_urb; u8 *ack_buffer; struct pn533 *priv; }; static void pn533_recv_response(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; struct sk_buff *skb = NULL; if (!urb->status) { skb = alloc_skb(urb->actual_length, GFP_ATOMIC); if (!skb) { nfc_err(&phy->udev->dev, "failed to alloc memory\n"); } else { skb_put_data(skb, urb->transfer_buffer, urb->actual_length); } } pn533_recv_frame(phy->priv, skb, urb->status); } static int pn533_submit_urb_for_response(struct pn533_usb_phy *phy, gfp_t flags) { phy->in_urb->complete = pn533_recv_response; return usb_submit_urb(phy->in_urb, flags); } static void pn533_recv_ack(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; struct pn533 *priv = phy->priv; struct pn533_cmd *cmd = priv->cmd; struct pn533_std_frame *in_frame; int rc; cmd->status = urb->status; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); goto sched_wq; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); goto sched_wq; } in_frame = phy->in_urb->transfer_buffer; if (!pn533_rx_frame_is_ack(in_frame)) { nfc_err(&phy->udev->dev, "Received an invalid ack\n"); cmd->status = -EIO; goto sched_wq; } rc = pn533_submit_urb_for_response(phy, GFP_ATOMIC); if (rc) { nfc_err(&phy->udev->dev, "usb_submit_urb failed with result %d\n", rc); cmd->status = rc; goto sched_wq; } return; sched_wq: queue_work(priv->wq, &priv->cmd_complete_work); } static int pn533_submit_urb_for_ack(struct pn533_usb_phy *phy, gfp_t flags) { phy->in_urb->complete = pn533_recv_ack; return usb_submit_urb(phy->in_urb, flags); } static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) { struct pn533_usb_phy *phy = dev->phy; static const u8 ack[6] = {0x00, 0x00, 0xff, 0x00, 0xff, 0x00}; /* spec 7.1.1.3: Preamble, SoPC (2), ACK Code (2), Postamble */ if (!phy->ack_buffer) { phy->ack_buffer = kmemdup(ack, sizeof(ack), flags); if (!phy->ack_buffer) return -ENOMEM; } phy->ack_urb->transfer_buffer = phy->ack_buffer; phy->ack_urb->transfer_buffer_length = sizeof(ack); return usb_submit_urb(phy->ack_urb, flags); } struct pn533_out_arg { struct pn533_usb_phy *phy; struct completion done; }; static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; struct pn533_out_arg arg; void *cntx; int rc; if (phy->priv == NULL) phy->priv = dev; phy->out_urb->transfer_buffer = out->data; phy->out_urb->transfer_buffer_length = out->len; print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); arg.phy = phy; init_completion(&arg.done); cntx = phy->out_urb->context; phy->out_urb->context = &arg; rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; wait_for_completion(&arg.done); phy->out_urb->context = cntx; if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); if (rc) goto error; } else if (dev->protocol_type == PN533_PROTO_REQ_ACK_RESP) { /* request for ACK if that's the case */ rc = pn533_submit_urb_for_ack(phy, GFP_KERNEL); if (rc) goto error; } return 0; error: usb_unlink_urb(phy->out_urb); return rc; } static void pn533_usb_abort_cmd(struct pn533 *dev, gfp_t flags) { struct pn533_usb_phy *phy = dev->phy; /* ACR122U does not support any command which aborts last * issued command i.e. as ACK for standard PN533. Additionally, * it behaves stange, sending broken or incorrect responses, * when we cancel urb before the chip will send response. */ if (dev->device_type == PN533_DEVICE_ACR122U) return; /* An ack will cancel the last issued command */ pn533_usb_send_ack(dev, flags); /* cancel the urb request */ usb_kill_urb(phy->in_urb); } /* ACR122 specific structs and functions */ /* ACS ACR122 pn533 frame definitions */ #define PN533_ACR122_TX_FRAME_HEADER_LEN (sizeof(struct pn533_acr122_tx_frame) \ + 2) #define PN533_ACR122_TX_FRAME_TAIL_LEN 0 #define PN533_ACR122_RX_FRAME_HEADER_LEN (sizeof(struct pn533_acr122_rx_frame) \ + 2) #define PN533_ACR122_RX_FRAME_TAIL_LEN 2 #define PN533_ACR122_FRAME_MAX_PAYLOAD_LEN PN533_STD_FRAME_MAX_PAYLOAD_LEN /* CCID messages types */ #define PN533_ACR122_PC_TO_RDR_ICCPOWERON 0x62 #define PN533_ACR122_PC_TO_RDR_ESCAPE 0x6B #define PN533_ACR122_RDR_TO_PC_ESCAPE 0x83 struct pn533_acr122_ccid_hdr { u8 type; u32 datalen; u8 slot; u8 seq; /* * 3 msg specific bytes or status, error and 1 specific * byte for reposnse msg */ u8 params[3]; u8 data[]; /* payload */ } __packed; struct pn533_acr122_apdu_hdr { u8 class; u8 ins; u8 p1; u8 p2; } __packed; struct pn533_acr122_tx_frame { struct pn533_acr122_ccid_hdr ccid; struct pn533_acr122_apdu_hdr apdu; u8 datalen; u8 data[]; /* pn533 frame: TFI ... */ } __packed; struct pn533_acr122_rx_frame { struct pn533_acr122_ccid_hdr ccid; u8 data[]; /* pn533 frame : TFI ... */ } __packed; static void pn533_acr122_tx_frame_init(void *_frame, u8 cmd_code) { struct pn533_acr122_tx_frame *frame = _frame; frame->ccid.type = PN533_ACR122_PC_TO_RDR_ESCAPE; /* sizeof(apdu_hdr) + sizeof(datalen) */ frame->ccid.datalen = sizeof(frame->apdu) + 1; frame->ccid.slot = 0; frame->ccid.seq = 0; frame->ccid.params[0] = 0; frame->ccid.params[1] = 0; frame->ccid.params[2] = 0; frame->data[0] = PN533_STD_FRAME_DIR_OUT; frame->data[1] = cmd_code; frame->datalen = 2; /* data[0] + data[1] */ frame->apdu.class = 0xFF; frame->apdu.ins = 0; frame->apdu.p1 = 0; frame->apdu.p2 = 0; } static void pn533_acr122_tx_frame_finish(void *_frame) { struct pn533_acr122_tx_frame *frame = _frame; frame->ccid.datalen += frame->datalen; } static void pn533_acr122_tx_update_payload_len(void *_frame, int len) { struct pn533_acr122_tx_frame *frame = _frame; frame->datalen += len; } static bool pn533_acr122_is_rx_frame_valid(void *_frame, struct pn533 *dev) { struct pn533_acr122_rx_frame *frame = _frame; if (frame->ccid.type != 0x83) return false; if (!frame->ccid.datalen) return false; if (frame->data[frame->ccid.datalen - 2] == 0x63) return false; return true; } static int pn533_acr122_rx_frame_size(void *frame) { struct pn533_acr122_rx_frame *f = frame; /* f->ccid.datalen already includes tail length */ return sizeof(struct pn533_acr122_rx_frame) + f->ccid.datalen; } static u8 pn533_acr122_get_cmd_code(void *frame) { struct pn533_acr122_rx_frame *f = frame; return PN533_FRAME_CMD(f); } static struct pn533_frame_ops pn533_acr122_frame_ops = { .tx_frame_init = pn533_acr122_tx_frame_init, .tx_frame_finish = pn533_acr122_tx_frame_finish, .tx_update_payload_len = pn533_acr122_tx_update_payload_len, .tx_header_len = PN533_ACR122_TX_FRAME_HEADER_LEN, .tx_tail_len = PN533_ACR122_TX_FRAME_TAIL_LEN, .rx_is_frame_valid = pn533_acr122_is_rx_frame_valid, .rx_header_len = PN533_ACR122_RX_FRAME_HEADER_LEN, .rx_tail_len = PN533_ACR122_RX_FRAME_TAIL_LEN, .rx_frame_size = pn533_acr122_rx_frame_size, .max_payload_len = PN533_ACR122_FRAME_MAX_PAYLOAD_LEN, .get_cmd_code = pn533_acr122_get_cmd_code, }; struct pn533_acr122_poweron_rdr_arg { int rc; struct completion done; }; static void pn533_acr122_poweron_rdr_resp(struct urb *urb) { struct pn533_acr122_poweron_rdr_arg *arg = urb->context; print_hex_dump_debug("ACR122 RX: ", DUMP_PREFIX_NONE, 16, 1, urb->transfer_buffer, urb->transfer_buffer_length, false); arg->rc = urb->status; complete(&arg->done); } static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) { /* Power on th reader (CCID cmd) */ u8 cmd[10] = {PN533_ACR122_PC_TO_RDR_ICCPOWERON, 0, 0, 0, 0, 0, 0, 3, 0, 0}; char *buffer; int transferred; int rc; void *cntx; struct pn533_acr122_poweron_rdr_arg arg; buffer = kmemdup(cmd, sizeof(cmd), GFP_KERNEL); if (!buffer) return -ENOMEM; init_completion(&arg.done); cntx = phy->in_urb->context; /* backup context */ phy->in_urb->complete = pn533_acr122_poweron_rdr_resp; phy->in_urb->context = &arg; print_hex_dump_debug("ACR122 TX: ", DUMP_PREFIX_NONE, 16, 1, cmd, sizeof(cmd), false); rc = usb_bulk_msg(phy->udev, phy->out_urb->pipe, buffer, sizeof(cmd), &transferred, 5000); kfree(buffer); if (rc || (transferred != sizeof(cmd))) { nfc_err(&phy->udev->dev, "Reader power on cmd error %d\n", rc); return rc; } rc = usb_submit_urb(phy->in_urb, GFP_KERNEL); if (rc) { nfc_err(&phy->udev->dev, "Can't submit reader poweron cmd response %d\n", rc); return rc; } wait_for_completion(&arg.done); phy->in_urb->context = cntx; /* restore context */ return arg.rc; } static void pn533_out_complete(struct urb *urb) { struct pn533_out_arg *arg = urb->context; struct pn533_usb_phy *phy = arg->phy; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); break; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); } complete(&arg->done); } static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; switch (urb->status) { case 0: break; /* success */ case -ECONNRESET: case -ENOENT: dev_dbg(&phy->udev->dev, "The urb has been stopped (status %d)\n", urb->status); break; case -ESHUTDOWN: default: nfc_err(&phy->udev->dev, "Urb failure (status %d)\n", urb->status); } } static const struct pn533_phy_ops usb_phy_ops = { .send_frame = pn533_usb_send_frame, .send_ack = pn533_usb_send_ack, .abort_cmd = pn533_usb_abort_cmd, }; static int pn533_usb_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct pn533 *priv; struct pn533_usb_phy *phy; struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; int in_endpoint = 0; int out_endpoint = 0; int rc = -ENOMEM; int i; u32 protocols; enum pn533_protocol_type protocol_type = PN533_PROTO_REQ_ACK_RESP; struct pn533_frame_ops *fops = NULL; unsigned char *in_buf; int in_buf_len = PN533_EXT_FRAME_HEADER_LEN + PN533_STD_FRAME_MAX_PAYLOAD_LEN + PN533_STD_FRAME_TAIL_LEN; phy = devm_kzalloc(&interface->dev, sizeof(*phy), GFP_KERNEL); if (!phy) return -ENOMEM; in_buf = kzalloc(in_buf_len, GFP_KERNEL); if (!in_buf) return -ENOMEM; phy->udev = usb_get_dev(interface_to_usbdev(interface)); phy->interface = interface; iface_desc = interface->cur_altsetting; for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; if (!in_endpoint && usb_endpoint_is_bulk_in(endpoint)) in_endpoint = endpoint->bEndpointAddress; if (!out_endpoint && usb_endpoint_is_bulk_out(endpoint)) out_endpoint = endpoint->bEndpointAddress; } if (!in_endpoint || !out_endpoint) { nfc_err(&interface->dev, "Could not find bulk-in or bulk-out endpoint\n"); rc = -ENODEV; goto error; } phy->in_urb = usb_alloc_urb(0, GFP_KERNEL); phy->out_urb = usb_alloc_urb(0, GFP_KERNEL); phy->ack_urb = usb_alloc_urb(0, GFP_KERNEL); if (!phy->in_urb || !phy->out_urb || !phy->ack_urb) goto error; usb_fill_bulk_urb(phy->in_urb, phy->udev, usb_rcvbulkpipe(phy->udev, in_endpoint), in_buf, in_buf_len, NULL, phy); usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: protocols = PN533_ALL_PROTOCOLS; break; case PN533_DEVICE_PASORI: protocols = PN533_NO_TYPE_B_PROTOCOLS; break; case PN533_DEVICE_ACR122U: protocols = PN533_NO_TYPE_B_PROTOCOLS; fops = &pn533_acr122_frame_ops; protocol_type = PN533_PROTO_REQ_RESP; rc = pn533_acr122_poweron_rdr(phy); if (rc < 0) { nfc_err(&interface->dev, "Couldn't poweron the reader (error %d)\n", rc); goto error; } break; default: nfc_err(&interface->dev, "Unknown device type %lu\n", id->driver_info); rc = -EINVAL; goto error; } priv = pn53x_common_init(id->driver_info, protocol_type, phy, &usb_phy_ops, fops, &phy->udev->dev); if (IS_ERR(priv)) { rc = PTR_ERR(priv); goto error; } phy->priv = priv; rc = pn533_finalize_setup(priv); if (rc) goto err_clean; usb_set_intfdata(interface, phy); rc = pn53x_register_nfc(priv, protocols, &interface->dev); if (rc) goto err_clean; return 0; err_clean: pn53x_common_clean(priv); error: usb_kill_urb(phy->in_urb); usb_kill_urb(phy->out_urb); usb_kill_urb(phy->ack_urb); usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); usb_put_dev(phy->udev); kfree(in_buf); kfree(phy->ack_buffer); return rc; } static void pn533_usb_disconnect(struct usb_interface *interface) { struct pn533_usb_phy *phy = usb_get_intfdata(interface); if (!phy) return; pn53x_unregister_nfc(phy->priv); pn53x_common_clean(phy->priv); usb_set_intfdata(interface, NULL); usb_kill_urb(phy->in_urb); usb_kill_urb(phy->out_urb); usb_kill_urb(phy->ack_urb); kfree(phy->in_urb->transfer_buffer); usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); kfree(phy->ack_buffer); nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n"); } static struct usb_driver pn533_usb_driver = { .name = "pn533_usb", .probe = pn533_usb_probe, .disconnect = pn533_usb_disconnect, .id_table = pn533_usb_table, }; module_usb_driver(pn533_usb_driver); MODULE_AUTHOR("Lauro Ramos Venancio <lauro.venancio@openbossa.org>"); MODULE_AUTHOR("Aloisio Almeida Jr <aloisio.almeida@openbossa.org>"); MODULE_AUTHOR("Waldemar Rymarkiewicz <waldemar.rymarkiewicz@tieto.com>"); MODULE_DESCRIPTION("PN533 USB driver ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL");
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 4 4 4 1 1 1 1 1 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 // SPDX-License-Identifier: GPL-2.0-only /* * scsi_sysfs.c * * SCSI sysfs interface routines. * * Created to pull SCSI mid layer sysfs routines into one file. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/device.h> #include <linux/pm_runtime.h> #include <linux/bsg.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_tcq.h> #include <scsi/scsi_dh.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_devinfo.h> #include "scsi_priv.h" #include "scsi_logging.h" static const struct device_type scsi_dev_type; static const struct { enum scsi_device_state value; char *name; } sdev_states[] = { { SDEV_CREATED, "created" }, { SDEV_RUNNING, "running" }, { SDEV_CANCEL, "cancel" }, { SDEV_DEL, "deleted" }, { SDEV_QUIESCE, "quiesce" }, { SDEV_OFFLINE, "offline" }, { SDEV_TRANSPORT_OFFLINE, "transport-offline" }, { SDEV_BLOCK, "blocked" }, { SDEV_CREATED_BLOCK, "created-blocked" }, }; const char *scsi_device_state_name(enum scsi_device_state state) { int i; char *name = NULL; for (i = 0; i < ARRAY_SIZE(sdev_states); i++) { if (sdev_states[i].value == state) { name = sdev_states[i].name; break; } } return name; } static const struct { enum scsi_host_state value; char *name; } shost_states[] = { { SHOST_CREATED, "created" }, { SHOST_RUNNING, "running" }, { SHOST_CANCEL, "cancel" }, { SHOST_DEL, "deleted" }, { SHOST_RECOVERY, "recovery" }, { SHOST_CANCEL_RECOVERY, "cancel/recovery" }, { SHOST_DEL_RECOVERY, "deleted/recovery", }, }; const char *scsi_host_state_name(enum scsi_host_state state) { int i; char *name = NULL; for (i = 0; i < ARRAY_SIZE(shost_states); i++) { if (shost_states[i].value == state) { name = shost_states[i].name; break; } } return name; } #ifdef CONFIG_SCSI_DH static const struct { unsigned char value; char *name; } sdev_access_states[] = { { SCSI_ACCESS_STATE_OPTIMAL, "active/optimized" }, { SCSI_ACCESS_STATE_ACTIVE, "active/non-optimized" }, { SCSI_ACCESS_STATE_STANDBY, "standby" }, { SCSI_ACCESS_STATE_UNAVAILABLE, "unavailable" }, { SCSI_ACCESS_STATE_LBA, "lba-dependent" }, { SCSI_ACCESS_STATE_OFFLINE, "offline" }, { SCSI_ACCESS_STATE_TRANSITIONING, "transitioning" }, }; static const char *scsi_access_state_name(unsigned char state) { int i; char *name = NULL; for (i = 0; i < ARRAY_SIZE(sdev_access_states); i++) { if (sdev_access_states[i].value == state) { name = sdev_access_states[i].name; break; } } return name; } #endif static int check_set(unsigned long long *val, char *src) { char *last; if (strcmp(src, "-") == 0) { *val = SCAN_WILD_CARD; } else { /* * Doesn't check for int overflow */ *val = simple_strtoull(src, &last, 0); if (*last != '\0') return 1; } return 0; } static int scsi_scan(struct Scsi_Host *shost, const char *str) { char s1[15], s2[15], s3[17], junk; unsigned long long channel, id, lun; int res; res = sscanf(str, "%10s %10s %16s %c", s1, s2, s3, &junk); if (res != 3) return -EINVAL; if (check_set(&channel, s1)) return -EINVAL; if (check_set(&id, s2)) return -EINVAL; if (check_set(&lun, s3)) return -EINVAL; if (shost->transportt->user_scan) res = shost->transportt->user_scan(shost, channel, id, lun); else res = scsi_scan_host_selected(shost, channel, id, lun, SCSI_SCAN_MANUAL); return res; } /* * shost_show_function: macro to create an attr function that can be used to * show a non-bit field. */ #define shost_show_function(name, field, format_string) \ static ssize_t \ show_##name (struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct Scsi_Host *shost = class_to_shost(dev); \ return snprintf (buf, 20, format_string, shost->field); \ } /* * shost_rd_attr: macro to create a function and attribute variable for a * read only field. */ #define shost_rd_attr2(name, field, format_string) \ shost_show_function(name, field, format_string) \ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL); #define shost_rd_attr(field, format_string) \ shost_rd_attr2(field, field, format_string) /* * Create the actual show/store functions and data structures. */ static ssize_t store_scan(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct Scsi_Host *shost = class_to_shost(dev); int res; res = scsi_scan(shost, buf); if (res == 0) res = count; return res; }; static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan); static ssize_t store_shost_state(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int i; struct Scsi_Host *shost = class_to_shost(dev); enum scsi_host_state state = 0; for (i = 0; i < ARRAY_SIZE(shost_states); i++) { const int len = strlen(shost_states[i].name); if (strncmp(shost_states[i].name, buf, len) == 0 && buf[len] == '\n') { state = shost_states[i].value; break; } } if (!state) return -EINVAL; if (scsi_host_set_state(shost, state)) return -EINVAL; return count; } static ssize_t show_shost_state(struct device *dev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = class_to_shost(dev); const char *name = scsi_host_state_name(shost->shost_state); if (!name) return -EINVAL; return snprintf(buf, 20, "%s\n", name); } /* DEVICE_ATTR(state) clashes with dev_attr_state for sdev */ static struct device_attribute dev_attr_hstate = __ATTR(state, S_IRUGO | S_IWUSR, show_shost_state, store_shost_state); static ssize_t show_shost_mode(unsigned int mode, char *buf) { ssize_t len = 0; if (mode & MODE_INITIATOR) len = sprintf(buf, "%s", "Initiator"); if (mode & MODE_TARGET) len += sprintf(buf + len, "%s%s", len ? ", " : "", "Target"); len += sprintf(buf + len, "\n"); return len; } static ssize_t show_shost_supported_mode(struct device *dev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = class_to_shost(dev); unsigned int supported_mode = shost->hostt->supported_mode; if (supported_mode == MODE_UNKNOWN) /* by default this should be initiator */ supported_mode = MODE_INITIATOR; return show_shost_mode(supported_mode, buf); } static DEVICE_ATTR(supported_mode, S_IRUGO | S_IWUSR, show_shost_supported_mode, NULL); static ssize_t show_shost_active_mode(struct device *dev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = class_to_shost(dev); if (shost->active_mode == MODE_UNKNOWN) return snprintf(buf, 20, "unknown\n"); else return show_shost_mode(shost->active_mode, buf); } static DEVICE_ATTR(active_mode, S_IRUGO | S_IWUSR, show_shost_active_mode, NULL); static int check_reset_type(const char *str) { if (sysfs_streq(str, "adapter")) return SCSI_ADAPTER_RESET; else if (sysfs_streq(str, "firmware")) return SCSI_FIRMWARE_RESET; else return 0; } static ssize_t store_host_reset(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct Scsi_Host *shost = class_to_shost(dev); const struct scsi_host_template *sht = shost->hostt; int ret = -EINVAL; int type; type = check_reset_type(buf); if (!type) goto exit_store_host_reset; if (sht->host_reset) ret = sht->host_reset(shost, type); else ret = -EOPNOTSUPP; exit_store_host_reset: if (ret == 0) ret = count; return ret; } static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset); static ssize_t show_shost_eh_deadline(struct device *dev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = class_to_shost(dev); if (shost->eh_deadline == -1) return snprintf(buf, strlen("off") + 2, "off\n"); return sprintf(buf, "%u\n", shost->eh_deadline / HZ); } static ssize_t store_shost_eh_deadline(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct Scsi_Host *shost = class_to_shost(dev); int ret = -EINVAL; unsigned long deadline, flags; if (shost->transportt && (shost->transportt->eh_strategy_handler || !shost->hostt->eh_host_reset_handler)) return ret; if (!strncmp(buf, "off", strlen("off"))) deadline = -1; else { ret = kstrtoul(buf, 10, &deadline); if (ret) return ret; if (deadline * HZ > UINT_MAX) return -EINVAL; } spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_in_recovery(shost)) ret = -EBUSY; else { if (deadline == -1) shost->eh_deadline = -1; else shost->eh_deadline = deadline * HZ; ret = count; } spin_unlock_irqrestore(shost->host_lock, flags); return ret; } static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline); shost_rd_attr(unique_id, "%u\n"); shost_rd_attr(cmd_per_lun, "%hd\n"); shost_rd_attr(can_queue, "%d\n"); shost_rd_attr(sg_tablesize, "%hu\n"); shost_rd_attr(sg_prot_tablesize, "%hu\n"); shost_rd_attr(prot_capabilities, "%u\n"); shost_rd_attr(prot_guard_type, "%hd\n"); shost_rd_attr2(proc_name, hostt->proc_name, "%s\n"); static ssize_t show_host_busy(struct device *dev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = class_to_shost(dev); return snprintf(buf, 20, "%d\n", scsi_host_busy(shost)); } static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL); static ssize_t show_use_blk_mq(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "1\n"); } static DEVICE_ATTR(use_blk_mq, S_IRUGO, show_use_blk_mq, NULL); static ssize_t show_nr_hw_queues(struct device *dev, struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = class_to_shost(dev); struct blk_mq_tag_set *tag_set = &shost->tag_set; return snprintf(buf, 20, "%d\n", tag_set->nr_hw_queues); } static DEVICE_ATTR(nr_hw_queues, S_IRUGO, show_nr_hw_queues, NULL); static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_use_blk_mq.attr, &dev_attr_unique_id.attr, &dev_attr_host_busy.attr, &dev_attr_cmd_per_lun.attr, &dev_attr_can_queue.attr, &dev_attr_sg_tablesize.attr, &dev_attr_sg_prot_tablesize.attr, &dev_attr_proc_name.attr, &dev_attr_scan.attr, &dev_attr_hstate.attr, &dev_attr_supported_mode.attr, &dev_attr_active_mode.attr, &dev_attr_prot_capabilities.attr, &dev_attr_prot_guard_type.attr, &dev_attr_host_reset.attr, &dev_attr_eh_deadline.attr, &dev_attr_nr_hw_queues.attr, NULL }; static const struct attribute_group scsi_shost_attr_group = { .attrs = scsi_sysfs_shost_attrs, }; const struct attribute_group *scsi_shost_groups[] = { &scsi_shost_attr_group, NULL }; static void scsi_device_cls_release(struct device *class_dev) { struct scsi_device *sdev; sdev = class_to_sdev(class_dev); put_device(&sdev->sdev_gendev); } static void scsi_device_dev_release(struct device *dev) { struct scsi_device *sdev = to_scsi_device(dev); struct device *parent; struct list_head *this, *tmp; struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL; struct scsi_vpd *vpd_pgb0 = NULL, *vpd_pgb1 = NULL, *vpd_pgb2 = NULL; struct scsi_vpd *vpd_pgb7 = NULL; unsigned long flags; might_sleep(); scsi_dh_release_device(sdev); parent = sdev->sdev_gendev.parent; spin_lock_irqsave(sdev->host->host_lock, flags); list_del(&sdev->siblings); list_del(&sdev->same_target_siblings); list_del(&sdev->starved_entry); spin_unlock_irqrestore(sdev->host->host_lock, flags); cancel_work_sync(&sdev->event_work); list_for_each_safe(this, tmp, &sdev->event_list) { struct scsi_event *evt; evt = list_entry(this, struct scsi_event, node); list_del(&evt->node); kfree(evt); } blk_put_queue(sdev->request_queue); /* NULL queue means the device can't be used */ sdev->request_queue = NULL; sbitmap_free(&sdev->budget_map); mutex_lock(&sdev->inquiry_mutex); vpd_pg0 = rcu_replace_pointer(sdev->vpd_pg0, vpd_pg0, lockdep_is_held(&sdev->inquiry_mutex)); vpd_pg80 = rcu_replace_pointer(sdev->vpd_pg80, vpd_pg80, lockdep_is_held(&sdev->inquiry_mutex)); vpd_pg83 = rcu_replace_pointer(sdev->vpd_pg83, vpd_pg83, lockdep_is_held(&sdev->inquiry_mutex)); vpd_pg89 = rcu_replace_pointer(sdev->vpd_pg89, vpd_pg89, lockdep_is_held(&sdev->inquiry_mutex)); vpd_pgb0 = rcu_replace_pointer(sdev->vpd_pgb0, vpd_pgb0, lockdep_is_held(&sdev->inquiry_mutex)); vpd_pgb1 = rcu_replace_pointer(sdev->vpd_pgb1, vpd_pgb1, lockdep_is_held(&sdev->inquiry_mutex)); vpd_pgb2 = rcu_replace_pointer(sdev->vpd_pgb2, vpd_pgb2, lockdep_is_held(&sdev->inquiry_mutex)); vpd_pgb7 = rcu_replace_pointer(sdev->vpd_pgb7, vpd_pgb7, lockdep_is_held(&sdev->inquiry_mutex)); mutex_unlock(&sdev->inquiry_mutex); if (vpd_pg0) kfree_rcu(vpd_pg0, rcu); if (vpd_pg83) kfree_rcu(vpd_pg83, rcu); if (vpd_pg80) kfree_rcu(vpd_pg80, rcu); if (vpd_pg89) kfree_rcu(vpd_pg89, rcu); if (vpd_pgb0) kfree_rcu(vpd_pgb0, rcu); if (vpd_pgb1) kfree_rcu(vpd_pgb1, rcu); if (vpd_pgb2) kfree_rcu(vpd_pgb2, rcu); if (vpd_pgb7) kfree_rcu(vpd_pgb7, rcu); kfree(sdev->inquiry); kfree(sdev); if (parent) put_device(parent); } static struct class sdev_class = { .name = "scsi_device", .dev_release = scsi_device_cls_release, }; /* all probing is done in the individual ->probe routines */ static int scsi_bus_match(struct device *dev, const struct device_driver *gendrv) { struct scsi_device *sdp; if (dev->type != &scsi_dev_type) return 0; sdp = to_scsi_device(dev); if (sdp->no_uld_attach) return 0; return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0; } static int scsi_bus_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct scsi_device *sdev; if (dev->type != &scsi_dev_type) return 0; sdev = to_scsi_device(dev); add_uevent_var(env, "MODALIAS=" SCSI_DEVICE_MODALIAS_FMT, sdev->type); return 0; } const struct bus_type scsi_bus_type = { .name = "scsi", .match = scsi_bus_match, .uevent = scsi_bus_uevent, #ifdef CONFIG_PM .pm = &scsi_bus_pm_ops, #endif }; int scsi_sysfs_register(void) { int error; error = bus_register(&scsi_bus_type); if (!error) { error = class_register(&sdev_class); if (error) bus_unregister(&scsi_bus_type); } return error; } void scsi_sysfs_unregister(void) { class_unregister(&sdev_class); bus_unregister(&scsi_bus_type); } /* * sdev_show_function: macro to create an attr function that can be used to * show a non-bit field. */ #define sdev_show_function(field, format_string) \ static ssize_t \ sdev_show_##field (struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct scsi_device *sdev; \ sdev = to_scsi_device(dev); \ return snprintf (buf, 20, format_string, sdev->field); \ } \ /* * sdev_rd_attr: macro to create a function and attribute variable for a * read only field. */ #define sdev_rd_attr(field, format_string) \ sdev_show_function(field, format_string) \ static DEVICE_ATTR(field, S_IRUGO, sdev_show_##field, NULL); /* * sdev_rw_attr: create a function and attribute variable for a * read/write field. */ #define sdev_rw_attr(field, format_string) \ sdev_show_function(field, format_string) \ \ static ssize_t \ sdev_store_##field (struct device *dev, struct device_attribute *attr, \ const char *buf, size_t count) \ { \ struct scsi_device *sdev; \ sdev = to_scsi_device(dev); \ sscanf (buf, format_string, &sdev->field); \ return count; \ } \ static DEVICE_ATTR(field, S_IRUGO | S_IWUSR, sdev_show_##field, sdev_store_##field); /* Currently we don't export bit fields, but we might in future, * so leave this code in */ #if 0 /* * sdev_rd_attr: create a function and attribute variable for a * read/write bit field. */ #define sdev_rw_attr_bit(field) \ sdev_show_function(field, "%d\n") \ \ static ssize_t \ sdev_store_##field (struct device *dev, struct device_attribute *attr, \ const char *buf, size_t count) \ { \ int ret; \ struct scsi_device *sdev; \ ret = scsi_sdev_check_buf_bit(buf); \ if (ret >= 0) { \ sdev = to_scsi_device(dev); \ sdev->field = ret; \ ret = count; \ } \ return ret; \ } \ static DEVICE_ATTR(field, S_IRUGO | S_IWUSR, sdev_show_##field, sdev_store_##field); /* * scsi_sdev_check_buf_bit: return 0 if buf is "0", return 1 if buf is "1", * else return -EINVAL. */ static int scsi_sdev_check_buf_bit(const char *buf) { if ((buf[1] == '\0') || ((buf[1] == '\n') && (buf[2] == '\0'))) { if (buf[0] == '1') return 1; else if (buf[0] == '0') return 0; else return -EINVAL; } else return -EINVAL; } #endif /* * Create the actual show/store functions and data structures. */ sdev_rd_attr (type, "%d\n"); sdev_rd_attr (scsi_level, "%d\n"); sdev_rd_attr (vendor, "%.8s\n"); sdev_rd_attr (model, "%.16s\n"); sdev_rd_attr (rev, "%.4s\n"); sdev_rd_attr (cdl_supported, "%d\n"); static ssize_t sdev_show_device_busy(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); return snprintf(buf, 20, "%d\n", scsi_device_busy(sdev)); } static DEVICE_ATTR(device_busy, S_IRUGO, sdev_show_device_busy, NULL); static ssize_t sdev_show_device_blocked(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_blocked)); } static DEVICE_ATTR(device_blocked, S_IRUGO, sdev_show_device_blocked, NULL); /* * TODO: can we make these symlinks to the block layer ones? */ static ssize_t sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev; sdev = to_scsi_device(dev); return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ); } static ssize_t sdev_store_timeout (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_device *sdev; int timeout; sdev = to_scsi_device(dev); sscanf (buf, "%d\n", &timeout); blk_queue_rq_timeout(sdev->request_queue, timeout * HZ); return count; } static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout); static ssize_t sdev_show_eh_timeout(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev; sdev = to_scsi_device(dev); return snprintf(buf, 20, "%u\n", sdev->eh_timeout / HZ); } static ssize_t sdev_store_eh_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_device *sdev; unsigned int eh_timeout; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; sdev = to_scsi_device(dev); err = kstrtouint(buf, 10, &eh_timeout); if (err) return err; sdev->eh_timeout = eh_timeout * HZ; return count; } static DEVICE_ATTR(eh_timeout, S_IRUGO | S_IWUSR, sdev_show_eh_timeout, sdev_store_eh_timeout); static ssize_t store_rescan_field (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { scsi_rescan_device(to_scsi_device(dev)); return count; } static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field); static ssize_t sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kernfs_node *kn; struct scsi_device *sdev = to_scsi_device(dev); /* * We need to try to get module, avoiding the module been removed * during delete. */ if (scsi_device_get(sdev)) return -ENODEV; kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); /* * Concurrent writes into the "delete" sysfs attribute may trigger * concurrent calls to device_remove_file() and scsi_remove_device(). * device_remove_file() handles concurrent removal calls by * serializing these and by ignoring the second and later removal * attempts. Concurrent calls of scsi_remove_device() are * serialized. The second and later calls of scsi_remove_device() are * ignored because the first call of that function changes the device * state into SDEV_DEL. */ device_remove_file(dev, attr); scsi_remove_device(sdev); if (kn) sysfs_unbreak_active_protection(kn); scsi_device_put(sdev); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); static ssize_t store_state_field(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int i, ret; struct scsi_device *sdev = to_scsi_device(dev); enum scsi_device_state state = 0; bool rescan_dev = false; for (i = 0; i < ARRAY_SIZE(sdev_states); i++) { const int len = strlen(sdev_states[i].name); if (strncmp(sdev_states[i].name, buf, len) == 0 && buf[len] == '\n') { state = sdev_states[i].value; break; } } switch (state) { case SDEV_RUNNING: case SDEV_OFFLINE: break; default: return -EINVAL; } mutex_lock(&sdev->state_mutex); switch (sdev->sdev_state) { case SDEV_RUNNING: case SDEV_OFFLINE: break; default: mutex_unlock(&sdev->state_mutex); return -EINVAL; } if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) { ret = 0; } else { ret = scsi_device_set_state(sdev, state); if (ret == 0 && state == SDEV_RUNNING) rescan_dev = true; } mutex_unlock(&sdev->state_mutex); if (rescan_dev) { /* * If the device state changes to SDEV_RUNNING, we need to * run the queue to avoid I/O hang, and rescan the device * to revalidate it. Running the queue first is necessary * because another thread may be waiting inside * blk_mq_freeze_queue_wait() and because that call may be * waiting for pending I/O to finish. */ blk_mq_run_hw_queues(sdev->request_queue, true); scsi_rescan_device(sdev); } return ret == 0 ? count : -EINVAL; } static ssize_t show_state_field(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); const char *name = scsi_device_state_name(sdev->sdev_state); if (!name) return -EINVAL; return snprintf(buf, 20, "%s\n", name); } static DEVICE_ATTR(state, S_IRUGO | S_IWUSR, show_state_field, store_state_field); static ssize_t show_queue_type_field(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); const char *name = "none"; if (sdev->simple_tags) name = "simple"; return snprintf(buf, 20, "%s\n", name); } static ssize_t store_queue_type_field(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_device *sdev = to_scsi_device(dev); if (!sdev->tagged_supported) return -EINVAL; sdev_printk(KERN_INFO, sdev, "ignoring write to deprecated queue_type attribute"); return count; } static DEVICE_ATTR(queue_type, S_IRUGO | S_IWUSR, show_queue_type_field, store_queue_type_field); #define sdev_vpd_pg_attr(_page) \ static ssize_t \ show_vpd_##_page(struct file *filp, struct kobject *kobj, \ struct bin_attribute *bin_attr, \ char *buf, loff_t off, size_t count) \ { \ struct device *dev = kobj_to_dev(kobj); \ struct scsi_device *sdev = to_scsi_device(dev); \ struct scsi_vpd *vpd_page; \ int ret = -EINVAL; \ \ rcu_read_lock(); \ vpd_page = rcu_dereference(sdev->vpd_##_page); \ if (vpd_page) \ ret = memory_read_from_buffer(buf, count, &off, \ vpd_page->data, vpd_page->len); \ rcu_read_unlock(); \ return ret; \ } \ static struct bin_attribute dev_attr_vpd_##_page = { \ .attr = {.name = __stringify(vpd_##_page), .mode = S_IRUGO }, \ .size = 0, \ .read = show_vpd_##_page, \ }; sdev_vpd_pg_attr(pg83); sdev_vpd_pg_attr(pg80); sdev_vpd_pg_attr(pg89); sdev_vpd_pg_attr(pgb0); sdev_vpd_pg_attr(pgb1); sdev_vpd_pg_attr(pgb2); sdev_vpd_pg_attr(pgb7); sdev_vpd_pg_attr(pg0); static ssize_t show_inquiry(struct file *filep, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); struct scsi_device *sdev = to_scsi_device(dev); if (!sdev->inquiry) return -EINVAL; return memory_read_from_buffer(buf, count, &off, sdev->inquiry, sdev->inquiry_len); } static struct bin_attribute dev_attr_inquiry = { .attr = { .name = "inquiry", .mode = S_IRUGO, }, .size = 0, .read = show_inquiry, }; static ssize_t show_iostat_counterbits(struct device *dev, struct device_attribute *attr, char *buf) { return snprintf(buf, 20, "%d\n", (int)sizeof(atomic_t) * 8); } static DEVICE_ATTR(iocounterbits, S_IRUGO, show_iostat_counterbits, NULL); #define show_sdev_iostat(field) \ static ssize_t \ show_iostat_##field(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct scsi_device *sdev = to_scsi_device(dev); \ unsigned long long count = atomic_read(&sdev->field); \ return snprintf(buf, 20, "0x%llx\n", count); \ } \ static DEVICE_ATTR(field, S_IRUGO, show_iostat_##field, NULL) show_sdev_iostat(iorequest_cnt); show_sdev_iostat(iodone_cnt); show_sdev_iostat(ioerr_cnt); show_sdev_iostat(iotmo_cnt); static ssize_t sdev_show_modalias(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev; sdev = to_scsi_device(dev); return snprintf (buf, 20, SCSI_DEVICE_MODALIAS_FMT "\n", sdev->type); } static DEVICE_ATTR(modalias, S_IRUGO, sdev_show_modalias, NULL); #define DECLARE_EVT_SHOW(name, Cap_name) \ static ssize_t \ sdev_show_evt_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct scsi_device *sdev = to_scsi_device(dev); \ int val = test_bit(SDEV_EVT_##Cap_name, sdev->supported_events);\ return snprintf(buf, 20, "%d\n", val); \ } #define DECLARE_EVT_STORE(name, Cap_name) \ static ssize_t \ sdev_store_evt_##name(struct device *dev, struct device_attribute *attr,\ const char *buf, size_t count) \ { \ struct scsi_device *sdev = to_scsi_device(dev); \ int val = simple_strtoul(buf, NULL, 0); \ if (val == 0) \ clear_bit(SDEV_EVT_##Cap_name, sdev->supported_events); \ else if (val == 1) \ set_bit(SDEV_EVT_##Cap_name, sdev->supported_events); \ else \ return -EINVAL; \ return count; \ } #define DECLARE_EVT(name, Cap_name) \ DECLARE_EVT_SHOW(name, Cap_name) \ DECLARE_EVT_STORE(name, Cap_name) \ static DEVICE_ATTR(evt_##name, S_IRUGO, sdev_show_evt_##name, \ sdev_store_evt_##name); #define REF_EVT(name) &dev_attr_evt_##name.attr DECLARE_EVT(media_change, MEDIA_CHANGE) DECLARE_EVT(inquiry_change_reported, INQUIRY_CHANGE_REPORTED) DECLARE_EVT(capacity_change_reported, CAPACITY_CHANGE_REPORTED) DECLARE_EVT(soft_threshold_reached, SOFT_THRESHOLD_REACHED_REPORTED) DECLARE_EVT(mode_parameter_change_reported, MODE_PARAMETER_CHANGE_REPORTED) DECLARE_EVT(lun_change_reported, LUN_CHANGE_REPORTED) static ssize_t sdev_store_queue_depth(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int depth, retval; struct scsi_device *sdev = to_scsi_device(dev); const struct scsi_host_template *sht = sdev->host->hostt; if (!sht->change_queue_depth) return -EINVAL; depth = simple_strtoul(buf, NULL, 0); if (depth < 1 || depth > sdev->host->can_queue) return -EINVAL; retval = sht->change_queue_depth(sdev, depth); if (retval < 0) return retval; sdev->max_queue_depth = sdev->queue_depth; return count; } sdev_show_function(queue_depth, "%d\n"); static DEVICE_ATTR(queue_depth, S_IRUGO | S_IWUSR, sdev_show_queue_depth, sdev_store_queue_depth); static ssize_t sdev_show_wwid(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); ssize_t count; count = scsi_vpd_lun_id(sdev, buf, PAGE_SIZE); if (count > 0) { buf[count] = '\n'; count++; } return count; } static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL); #define BLIST_FLAG_NAME(name) \ [const_ilog2((__force __u64)BLIST_##name)] = #name static const char *const sdev_bflags_name[] = { #include "scsi_devinfo_tbl.c" }; #undef BLIST_FLAG_NAME static ssize_t sdev_show_blacklist(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); int i; ssize_t len = 0; for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) { const char *name = NULL; if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i))) continue; if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i]) name = sdev_bflags_name[i]; if (name) len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? " " : "", name); else len += scnprintf(buf + len, PAGE_SIZE - len, "%sINVALID_BIT(%d)", len ? " " : "", i); } if (len) len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } static DEVICE_ATTR(blacklist, S_IRUGO, sdev_show_blacklist, NULL); #ifdef CONFIG_SCSI_DH static ssize_t sdev_show_dh_state(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); if (!sdev->handler) return snprintf(buf, 20, "detached\n"); return snprintf(buf, 20, "%s\n", sdev->handler->name); } static ssize_t sdev_store_dh_state(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_device *sdev = to_scsi_device(dev); int err = -EINVAL; if (sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL) return -ENODEV; if (!sdev->handler) { /* * Attach to a device handler */ err = scsi_dh_attach(sdev->request_queue, buf); } else if (!strncmp(buf, "activate", 8)) { /* * Activate a device handler */ if (sdev->handler->activate) err = sdev->handler->activate(sdev, NULL, NULL); else err = 0; } else if (!strncmp(buf, "detach", 6)) { /* * Detach from a device handler */ sdev_printk(KERN_WARNING, sdev, "can't detach handler %s.\n", sdev->handler->name); err = -EINVAL; } return err < 0 ? err : count; } static DEVICE_ATTR(dh_state, S_IRUGO | S_IWUSR, sdev_show_dh_state, sdev_store_dh_state); static ssize_t sdev_show_access_state(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); unsigned char access_state; const char *access_state_name; if (!sdev->handler) return -EINVAL; access_state = (sdev->access_state & SCSI_ACCESS_STATE_MASK); access_state_name = scsi_access_state_name(access_state); return sprintf(buf, "%s\n", access_state_name ? access_state_name : "unknown"); } static DEVICE_ATTR(access_state, S_IRUGO, sdev_show_access_state, NULL); static ssize_t sdev_show_preferred_path(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); if (!sdev->handler) return -EINVAL; if (sdev->access_state & SCSI_ACCESS_STATE_PREFERRED) return sprintf(buf, "1\n"); else return sprintf(buf, "0\n"); } static DEVICE_ATTR(preferred_path, S_IRUGO, sdev_show_preferred_path, NULL); #endif static ssize_t sdev_show_queue_ramp_up_period(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev; sdev = to_scsi_device(dev); return snprintf(buf, 20, "%u\n", jiffies_to_msecs(sdev->queue_ramp_up_period)); } static ssize_t sdev_store_queue_ramp_up_period(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct scsi_device *sdev = to_scsi_device(dev); unsigned int period; if (kstrtouint(buf, 10, &period)) return -EINVAL; sdev->queue_ramp_up_period = msecs_to_jiffies(period); return count; } static DEVICE_ATTR(queue_ramp_up_period, S_IRUGO | S_IWUSR, sdev_show_queue_ramp_up_period, sdev_store_queue_ramp_up_period); static ssize_t sdev_show_cdl_enable(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); return sysfs_emit(buf, "%d\n", (int)sdev->cdl_enable); } static ssize_t sdev_store_cdl_enable(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int ret; bool v; if (kstrtobool(buf, &v)) return -EINVAL; ret = scsi_cdl_enable(to_scsi_device(dev), v); if (ret) return ret; return count; } static DEVICE_ATTR(cdl_enable, S_IRUGO | S_IWUSR, sdev_show_cdl_enable, sdev_store_cdl_enable); static umode_t scsi_sdev_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = kobj_to_dev(kobj); struct scsi_device *sdev = to_scsi_device(dev); if (attr == &dev_attr_queue_depth.attr && !sdev->host->hostt->change_queue_depth) return S_IRUGO; if (attr == &dev_attr_queue_ramp_up_period.attr && !sdev->host->hostt->change_queue_depth) return 0; return attr->mode; } static umode_t scsi_sdev_bin_attr_is_visible(struct kobject *kobj, struct bin_attribute *attr, int i) { struct device *dev = kobj_to_dev(kobj); struct scsi_device *sdev = to_scsi_device(dev); if (attr == &dev_attr_vpd_pg0 && !sdev->vpd_pg0) return 0; if (attr == &dev_attr_vpd_pg80 && !sdev->vpd_pg80) return 0; if (attr == &dev_attr_vpd_pg83 && !sdev->vpd_pg83) return 0; if (attr == &dev_attr_vpd_pg89 && !sdev->vpd_pg89) return 0; if (attr == &dev_attr_vpd_pgb0 && !sdev->vpd_pgb0) return 0; if (attr == &dev_attr_vpd_pgb1 && !sdev->vpd_pgb1) return 0; if (attr == &dev_attr_vpd_pgb2 && !sdev->vpd_pgb2) return 0; if (attr == &dev_attr_vpd_pgb7 && !sdev->vpd_pgb7) return 0; return S_IRUGO; } /* Default template for device attributes. May NOT be modified */ static struct attribute *scsi_sdev_attrs[] = { &dev_attr_device_blocked.attr, &dev_attr_type.attr, &dev_attr_scsi_level.attr, &dev_attr_device_busy.attr, &dev_attr_vendor.attr, &dev_attr_model.attr, &dev_attr_rev.attr, &dev_attr_rescan.attr, &dev_attr_delete.attr, &dev_attr_state.attr, &dev_attr_timeout.attr, &dev_attr_eh_timeout.attr, &dev_attr_iocounterbits.attr, &dev_attr_iorequest_cnt.attr, &dev_attr_iodone_cnt.attr, &dev_attr_ioerr_cnt.attr, &dev_attr_iotmo_cnt.attr, &dev_attr_modalias.attr, &dev_attr_queue_depth.attr, &dev_attr_queue_type.attr, &dev_attr_wwid.attr, &dev_attr_blacklist.attr, #ifdef CONFIG_SCSI_DH &dev_attr_dh_state.attr, &dev_attr_access_state.attr, &dev_attr_preferred_path.attr, #endif &dev_attr_queue_ramp_up_period.attr, &dev_attr_cdl_supported.attr, &dev_attr_cdl_enable.attr, REF_EVT(media_change), REF_EVT(inquiry_change_reported), REF_EVT(capacity_change_reported), REF_EVT(soft_threshold_reached), REF_EVT(mode_parameter_change_reported), REF_EVT(lun_change_reported), NULL }; static struct bin_attribute *scsi_sdev_bin_attrs[] = { &dev_attr_vpd_pg0, &dev_attr_vpd_pg83, &dev_attr_vpd_pg80, &dev_attr_vpd_pg89, &dev_attr_vpd_pgb0, &dev_attr_vpd_pgb1, &dev_attr_vpd_pgb2, &dev_attr_vpd_pgb7, &dev_attr_inquiry, NULL }; static struct attribute_group scsi_sdev_attr_group = { .attrs = scsi_sdev_attrs, .bin_attrs = scsi_sdev_bin_attrs, .is_visible = scsi_sdev_attr_is_visible, .is_bin_visible = scsi_sdev_bin_attr_is_visible, }; static const struct attribute_group *scsi_sdev_attr_groups[] = { &scsi_sdev_attr_group, NULL }; static int scsi_target_add(struct scsi_target *starget) { int error; if (starget->state != STARGET_CREATED) return 0; error = device_add(&starget->dev); if (error) { dev_err(&starget->dev, "target device_add failed, error %d\n", error); return error; } transport_add_device(&starget->dev); starget->state = STARGET_RUNNING; pm_runtime_set_active(&starget->dev); pm_runtime_enable(&starget->dev); device_enable_async_suspend(&starget->dev); return 0; } /** * scsi_sysfs_add_sdev - add scsi device to sysfs * @sdev: scsi_device to add * * Return value: * 0 on Success / non-zero on Failure **/ int scsi_sysfs_add_sdev(struct scsi_device *sdev) { int error; struct scsi_target *starget = sdev->sdev_target; error = scsi_target_add(starget); if (error) return error; transport_configure_device(&starget->dev); device_enable_async_suspend(&sdev->sdev_gendev); scsi_autopm_get_target(starget); pm_runtime_set_active(&sdev->sdev_gendev); if (!sdev->rpm_autosuspend) pm_runtime_forbid(&sdev->sdev_gendev); pm_runtime_enable(&sdev->sdev_gendev); scsi_autopm_put_target(starget); scsi_autopm_get_device(sdev); scsi_dh_add_device(sdev); error = device_add(&sdev->sdev_gendev); if (error) { sdev_printk(KERN_INFO, sdev, "failed to add device: %d\n", error); return error; } device_enable_async_suspend(&sdev->sdev_dev); error = device_add(&sdev->sdev_dev); if (error) { sdev_printk(KERN_INFO, sdev, "failed to add class device: %d\n", error); device_del(&sdev->sdev_gendev); return error; } transport_add_device(&sdev->sdev_gendev); sdev->is_visible = 1; if (IS_ENABLED(CONFIG_BLK_DEV_BSG)) { sdev->bsg_dev = scsi_bsg_register_queue(sdev); if (IS_ERR(sdev->bsg_dev)) { error = PTR_ERR(sdev->bsg_dev); sdev_printk(KERN_INFO, sdev, "Failed to register bsg queue, errno=%d\n", error); sdev->bsg_dev = NULL; } } scsi_autopm_put_device(sdev); return error; } void __scsi_remove_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; int res; /* * This cleanup path is not reentrant and while it is impossible * to get a new reference with scsi_device_get() someone can still * hold a previously acquired one. */ if (sdev->sdev_state == SDEV_DEL) return; if (sdev->is_visible) { /* * If scsi_internal_target_block() is running concurrently, * wait until it has finished before changing the device state. */ mutex_lock(&sdev->state_mutex); /* * If blocked, we go straight to DEL and restart the queue so * any commands issued during driver shutdown (like sync * cache) are errored immediately. */ res = scsi_device_set_state(sdev, SDEV_CANCEL); if (res != 0) { res = scsi_device_set_state(sdev, SDEV_DEL); if (res == 0) scsi_start_queue(sdev); } mutex_unlock(&sdev->state_mutex); if (res != 0) return; if (IS_ENABLED(CONFIG_BLK_DEV_BSG) && sdev->bsg_dev) bsg_unregister_queue(sdev->bsg_dev); device_unregister(&sdev->sdev_dev); transport_remove_device(dev); device_del(dev); } else put_device(&sdev->sdev_dev); /* * Stop accepting new requests and wait until all queuecommand() and * scsi_run_queue() invocations have finished before tearing down the * device. */ mutex_lock(&sdev->state_mutex); scsi_device_set_state(sdev, SDEV_DEL); mutex_unlock(&sdev->state_mutex); blk_mq_destroy_queue(sdev->request_queue); kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags); cancel_work_sync(&sdev->requeue_work); if (sdev->host->hostt->slave_destroy) sdev->host->hostt->slave_destroy(sdev); transport_destroy_device(dev); /* * Paired with the kref_get() in scsi_sysfs_initialize(). We have * removed sysfs visibility from the device, so make the target * invisible if this was the last device underneath it. */ scsi_target_reap(scsi_target(sdev)); put_device(dev); } /** * scsi_remove_device - unregister a device from the scsi bus * @sdev: scsi_device to unregister **/ void scsi_remove_device(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; mutex_lock(&shost->scan_mutex); __scsi_remove_device(sdev); mutex_unlock(&shost->scan_mutex); } EXPORT_SYMBOL(scsi_remove_device); static void __scsi_remove_target(struct scsi_target *starget) { struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); unsigned long flags; struct scsi_device *sdev; spin_lock_irqsave(shost->host_lock, flags); restart: list_for_each_entry(sdev, &shost->__devices, siblings) { /* * We cannot call scsi_device_get() here, as * we might've been called from rmmod() causing * scsi_device_get() to fail the module_is_live() * check. */ if (sdev->channel != starget->channel || sdev->id != starget->id) continue; if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL || !get_device(&sdev->sdev_gendev)) continue; spin_unlock_irqrestore(shost->host_lock, flags); scsi_remove_device(sdev); put_device(&sdev->sdev_gendev); spin_lock_irqsave(shost->host_lock, flags); goto restart; } spin_unlock_irqrestore(shost->host_lock, flags); } /** * scsi_remove_target - try to remove a target and all its devices * @dev: generic starget or parent of generic stargets to be removed * * Note: This is slightly racy. It is possible that if the user * requests the addition of another device then the target won't be * removed. */ void scsi_remove_target(struct device *dev) { struct Scsi_Host *shost = dev_to_shost(dev->parent); struct scsi_target *starget; unsigned long flags; restart: spin_lock_irqsave(shost->host_lock, flags); list_for_each_entry(starget, &shost->__targets, siblings) { if (starget->state == STARGET_DEL || starget->state == STARGET_REMOVE || starget->state == STARGET_CREATED_REMOVE) continue; if (starget->dev.parent == dev || &starget->dev == dev) { kref_get(&starget->reap_ref); if (starget->state == STARGET_CREATED) starget->state = STARGET_CREATED_REMOVE; else starget->state = STARGET_REMOVE; spin_unlock_irqrestore(shost->host_lock, flags); __scsi_remove_target(starget); scsi_target_reap(starget); goto restart; } } spin_unlock_irqrestore(shost->host_lock, flags); } EXPORT_SYMBOL(scsi_remove_target); int __scsi_register_driver(struct device_driver *drv, struct module *owner) { drv->bus = &scsi_bus_type; drv->owner = owner; return driver_register(drv); } EXPORT_SYMBOL(__scsi_register_driver); int scsi_register_interface(struct class_interface *intf) { intf->class = &sdev_class; return class_interface_register(intf); } EXPORT_SYMBOL(scsi_register_interface); /** * scsi_sysfs_add_host - add scsi host to subsystem * @shost: scsi host struct to add to subsystem **/ int scsi_sysfs_add_host(struct Scsi_Host *shost) { transport_register_device(&shost->shost_gendev); transport_configure_device(&shost->shost_gendev); return 0; } static const struct device_type scsi_dev_type = { .name = "scsi_device", .release = scsi_device_dev_release, .groups = scsi_sdev_attr_groups, }; void scsi_sysfs_device_initialize(struct scsi_device *sdev) { unsigned long flags; struct Scsi_Host *shost = sdev->host; const struct scsi_host_template *hostt = shost->hostt; struct scsi_target *starget = sdev->sdev_target; device_initialize(&sdev->sdev_gendev); sdev->sdev_gendev.bus = &scsi_bus_type; sdev->sdev_gendev.type = &scsi_dev_type; scsi_enable_async_suspend(&sdev->sdev_gendev); dev_set_name(&sdev->sdev_gendev, "%d:%d:%d:%llu", sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); sdev->sdev_gendev.groups = hostt->sdev_groups; device_initialize(&sdev->sdev_dev); sdev->sdev_dev.parent = get_device(&sdev->sdev_gendev); sdev->sdev_dev.class = &sdev_class; dev_set_name(&sdev->sdev_dev, "%d:%d:%d:%llu", sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); /* * Get a default scsi_level from the target (derived from sibling * devices). This is the best we can do for guessing how to set * sdev->lun_in_cdb for the initial INQUIRY command. For LUN 0 the * setting doesn't matter, because all the bits are zero anyway. * But it does matter for higher LUNs. */ sdev->scsi_level = starget->scsi_level; if (sdev->scsi_level <= SCSI_2 && sdev->scsi_level != SCSI_UNKNOWN && !shost->no_scsi2_lun_in_cdb) sdev->lun_in_cdb = 1; transport_setup_device(&sdev->sdev_gendev); spin_lock_irqsave(shost->host_lock, flags); list_add_tail(&sdev->same_target_siblings, &starget->devices); list_add_tail(&sdev->siblings, &shost->__devices); spin_unlock_irqrestore(shost->host_lock, flags); /* * device can now only be removed via __scsi_remove_device() so hold * the target. Target will be held in CREATED state until something * beneath it becomes visible (in which case it moves to RUNNING) */ kref_get(&starget->reap_ref); } int scsi_is_sdev_device(const struct device *dev) { return dev->type == &scsi_dev_type; } EXPORT_SYMBOL(scsi_is_sdev_device); /* A blank transport template that is used in drivers that don't * yet implement Transport Attributes */ struct scsi_transport_template blank_transport_template = { { { {NULL, }, }, }, };
200 2 54 25 1 149 1398 1314 232 501 120 90 256 24 498 122 495 495 498 498 500 500 5026 4455 1169 4957 63 207 322 4378 1090 63 224 111 111 4209 8 4287 4767 287 1521 1 43 23 1 22 200 78 21 347 105 3446 1259 29 101 3339 1235 51 48 1 1 30 966 1143 25 2769 3019 1115 1 101 783 22 15 728 6 5 98 38 128 2 1 123 4 618 5 58 589 150 43 401 40 369 17 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_H #define _LINUX_FS_H #include <linux/linkage.h> #include <linux/wait_bit.h> #include <linux/kdev_t.h> #include <linux/dcache.h> #include <linux/path.h> #include <linux/stat.h> #include <linux/cache.h> #include <linux/list.h> #include <linux/list_lru.h> #include <linux/llist.h> #include <linux/radix-tree.h> #include <linux/xarray.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/pid.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/mm_types.h> #include <linux/capability.h> #include <linux/semaphore.h> #include <linux/fcntl.h> #include <linux/rculist_bl.h> #include <linux/atomic.h> #include <linux/shrinker.h> #include <linux/migrate_mode.h> #include <linux/uidgid.h> #include <linux/lockdep.h> #include <linux/percpu-rwsem.h> #include <linux/workqueue.h> #include <linux/delayed_call.h> #include <linux/uuid.h> #include <linux/errseq.h> #include <linux/ioprio.h> #include <linux/fs_types.h> #include <linux/build_bug.h> #include <linux/stddef.h> #include <linux/mount.h> #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> #include <linux/maple_tree.h> #include <linux/rw_hint.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> struct backing_dev_info; struct bdi_writeback; struct bio; struct io_comp_batch; struct export_operations; struct fiemap_extent_info; struct hd_geometry; struct iovec; struct kiocb; struct kobject; struct pipe_inode_info; struct poll_table_struct; struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; struct swap_info_struct; struct seq_file; struct workqueue_struct; struct iov_iter; struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; struct fsnotify_mark_connector; struct fsnotify_sb_info; struct fs_context; struct fs_parameter_spec; struct fileattr; struct iomap_ops; extern void __init inode_init(void); extern void __init inode_init_early(void); extern void __init files_init(void); extern void __init files_maxfiles_init(void); extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; typedef __kernel_rwf_t rwf_t; struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); #define MAY_EXEC 0x00000001 #define MAY_WRITE 0x00000002 #define MAY_READ 0x00000004 #define MAY_APPEND 0x00000008 #define MAY_ACCESS 0x00000010 #define MAY_OPEN 0x00000020 #define MAY_CHDIR 0x00000040 /* called from RCU mode, don't block */ #define MAY_NOT_BLOCK 0x00000080 /* * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open() */ /* file is open for reading */ #define FMODE_READ ((__force fmode_t)(1 << 0)) /* file is open for writing */ #define FMODE_WRITE ((__force fmode_t)(1 << 1)) /* file is seekable */ #define FMODE_LSEEK ((__force fmode_t)(1 << 2)) /* file can be accessed using pread */ #define FMODE_PREAD ((__force fmode_t)(1 << 3)) /* file can be accessed using pwrite */ #define FMODE_PWRITE ((__force fmode_t)(1 << 4)) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)(1 << 5)) /* File writes are restricted (block device specific) */ #define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6)) /* File supports atomic writes */ #define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)(1 << 7)) /* FMODE_* bit 8 */ /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)(1 << 9)) /* 64bit hashes as llseek() offset (for directories) */ #define FMODE_64BITHASH ((__force fmode_t)(1 << 10)) /* * Don't update ctime and mtime. * * Currently a special hack for the XFS open_by_handle ioctl, but we'll * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. */ #define FMODE_NOCMTIME ((__force fmode_t)(1 << 11)) /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)(1 << 12)) /* File is huge (eg. /dev/mem): treat loff_t as unsigned */ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13)) /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)(1 << 14)) /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)(1 << 15)) /* Write access to underlying fs */ #define FMODE_WRITER ((__force fmode_t)(1 << 16)) /* Has read method(s) */ #define FMODE_CAN_READ ((__force fmode_t)(1 << 17)) /* Has write method(s) */ #define FMODE_CAN_WRITE ((__force fmode_t)(1 << 18)) #define FMODE_OPENED ((__force fmode_t)(1 << 19)) #define FMODE_CREATED ((__force fmode_t)(1 << 20)) /* File is stream-like */ #define FMODE_STREAM ((__force fmode_t)(1 << 21)) /* File supports DIRECT IO */ #define FMODE_CAN_ODIRECT ((__force fmode_t)(1 << 22)) #define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) /* FMODE_* bit 24 */ /* File is embedded in backing_file object */ #define FMODE_BACKING ((__force fmode_t)(1 << 25)) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) /* File represents mount that needs unmounting */ #define FMODE_NEED_UNMOUNT ((__force fmode_t)(1 << 28)) /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) /* * Attribute flags. These should be or-ed together to figure out what * has been changed! */ #define ATTR_MODE (1 << 0) #define ATTR_UID (1 << 1) #define ATTR_GID (1 << 2) #define ATTR_SIZE (1 << 3) #define ATTR_ATIME (1 << 4) #define ATTR_MTIME (1 << 5) #define ATTR_CTIME (1 << 6) #define ATTR_ATIME_SET (1 << 7) #define ATTR_MTIME_SET (1 << 8) #define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ #define ATTR_KILL_SUID (1 << 11) #define ATTR_KILL_SGID (1 << 12) #define ATTR_FILE (1 << 13) #define ATTR_KILL_PRIV (1 << 14) #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) #define ATTR_TOUCH (1 << 17) #define ATTR_DELEG (1 << 18) /* Delegated attrs. Don't break write delegations */ /* * Whiteout is represented by a char device. The following constants define the * mode and device number to use. */ #define WHITEOUT_MODE 0 #define WHITEOUT_DEV 0 /* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. * Also, in this manner, a Filesystem can look at only the values it cares * about. Basically, these are the attributes that the VFS layer can * request to change from the FS layer. * * Derek Atkins <warlord@MIT.EDU> 94-10-20 */ struct iattr { unsigned int ia_valid; umode_t ia_mode; /* * The two anonymous unions wrap structures with the same member. * * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which * are a dedicated type requiring the filesystem to use the dedicated * helpers. Other filesystem can continue to use ia_{g,u}id until they * have been ported. * * They always contain the same value. In other words FS_ALLOW_IDMAP * pass down the same value on idmapped mounts as they would on regular * mounts. */ union { kuid_t ia_uid; vfsuid_t ia_vfsuid; }; union { kgid_t ia_gid; vfsgid_t ia_vfsgid; }; loff_t ia_size; struct timespec64 ia_atime; struct timespec64 ia_mtime; struct timespec64 ia_ctime; /* * Not an attribute, but an auxiliary info for filesystems wanting to * implement an ftruncate() like method. NOTE: filesystem should * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). */ struct file *ia_file; }; /* * Includes for diskquotas. */ #include <linux/quota.h> /* * Maximum number of layers of fs stack. Needs to be limited to * prevent kernel stack overflow */ #define FILESYSTEM_MAX_STACK_DEPTH 2 /** * enum positive_aop_returns - aop return codes with specific semantics * * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has * completed, that the page is still locked, and * should be considered active. The VM uses this hint * to return the page to the active list -- it won't * be a candidate for writeback again in the near * future. Other callers must be careful to unlock * the page if they get this return. Returned by * writepage(); * * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has * unlocked it and the page might have been truncated. * The caller should back up to acquiring a new page and * trying again. The aop will be taking reasonable * precautions not to livelock. If the caller held a page * reference, it should drop it before retrying. Returned * by read_folio(). * * address_space_operation functions return these large constants to indicate * special semantics to the caller. These are much larger than the bytes in a * page to allow for functions that return the number of bytes operated on in a * given page. */ enum positive_aop_returns { AOP_WRITEPAGE_ACTIVATE = 0x80000, AOP_TRUNCATED_PAGE = 0x80001, }; /* * oh the beauties of C type declarations. */ struct page; struct address_space; struct writeback_control; struct readahead_control; /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC #define IOCB_SYNC (__force int) RWF_SYNC #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND #define IOCB_ATOMIC (__force int) RWF_ATOMIC /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) #define IOCB_DIRECT (1 << 17) #define IOCB_WRITE (1 << 18) /* iocb->ki_waitq is valid */ #define IOCB_WAITQ (1 << 19) #define IOCB_NOIO (1 << 20) /* can use bio alloc cache */ #define IOCB_ALLOC_CACHE (1 << 21) /* * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the * iocb completion can be passed back to the owner for execution from a safe * context rather than needing to be punted through a workqueue. If this * flag is set, the bio completion handling may set iocb->dio_complete to a * handler function and iocb->private to context information for that handler. * The issuer should call the handler with that context information from task * context to complete the processing of the iocb. Note that while this * provides a task context for the dio_complete() callback, it should only be * used on the completion side for non-IO generating completions. It's fine to * call blocking functions from this callback, but they should not wait for * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) /* kiocb is a read or write operation submitted by fs/aio.c. */ #define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ { IOCB_HIPRI, "HIPRI" }, \ { IOCB_DSYNC, "DSYNC" }, \ { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ { IOCB_ATOMIC, "ATOMIC"}, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } struct kiocb { struct file *ki_filp; loff_t ki_pos; void (*ki_complete)(struct kiocb *iocb, long ret); void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ union { /* * Only used for async buffered reads, where it denotes the * page waitqueue associated with completing the read. Valid * IFF IOCB_WAITQ is set. */ struct wait_page_queue *ki_waitq; /* * Can be used for O_DIRECT IO, where the completion handling * is punted back to the issuer of the IO. May only be set * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer * must then check for presence of this handler when ki_complete * is invoked. The data passed in to this handler must be * assigned to ->private when dio_complete is assigned. */ ssize_t (*dio_complete)(void *data); }; }; static inline bool is_sync_kiocb(struct kiocb *kiocb) { return kiocb->ki_complete == NULL; } struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); /* Mark a folio dirty. Return true if this dirtied it */ bool (*dirty_folio)(struct address_space *, struct folio *); void (*readahead)(struct readahead_control *); int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata); int (*write_end)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t offset, size_t len); bool (*release_folio)(struct folio *, gfp_t); void (*free_folio)(struct folio *folio); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); /* * migrate the contents of a folio to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. */ int (*migrate_folio)(struct address_space *, struct folio *dst, struct folio *src, enum migrate_mode); int (*launder_folio)(struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb); int (*error_remove_folio)(struct address_space *, struct folio *); /* swapfile support */ int (*swap_activate)(struct swap_info_struct *sis, struct file *file, sector_t *span); void (*swap_deactivate)(struct file *file); int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter); }; extern const struct address_space_operations empty_aops; /** * struct address_space - Contents of a cacheable, mappable object. * @host: Owner, either the inode or the block_device. * @i_pages: Cached pages. * @invalidate_lock: Guards coherency between page cache contents and * file offset->disk block mappings in the filesystem during invalidates. * It is also used to block modification of page cache contents through * memory mappings. * @gfp_mask: Memory allocation flags to use for allocating pages. * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings. * @nr_thps: Number of THPs in the pagecache (non-shmem only). * @i_mmap: Tree of private and shared mappings. * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable. * @nrpages: Number of page entries, protected by the i_pages lock. * @writeback_index: Writeback starts here. * @a_ops: Methods. * @flags: Error bits and flags (AS_*). * @wb_err: The most recent error which has occurred. * @i_private_lock: For use by the owner of the address_space. * @i_private_list: For use by the owner of the address_space. * @i_private_data: For use by the owner of the address_space. */ struct address_space { struct inode *host; struct xarray i_pages; struct rw_semaphore invalidate_lock; gfp_t gfp_mask; atomic_t i_mmap_writable; #ifdef CONFIG_READ_ONLY_THP_FOR_FS /* number of thp, only for non-shmem files */ atomic_t nr_thps; #endif struct rb_root_cached i_mmap; unsigned long nrpages; pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; errseq_t wb_err; spinlock_t i_private_lock; struct list_head i_private_list; struct rw_semaphore i_mmap_rwsem; void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ #define PAGECACHE_TAG_DIRTY XA_MARK_0 #define PAGECACHE_TAG_WRITEBACK XA_MARK_1 #define PAGECACHE_TAG_TOWRITE XA_MARK_2 /* * Returns true if any of the pages in the mapping are marked with the tag. */ static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag) { return xa_marked(&mapping->i_pages, tag); } static inline void i_mmap_lock_write(struct address_space *mapping) { down_write(&mapping->i_mmap_rwsem); } static inline int i_mmap_trylock_write(struct address_space *mapping) { return down_write_trylock(&mapping->i_mmap_rwsem); } static inline void i_mmap_unlock_write(struct address_space *mapping) { up_write(&mapping->i_mmap_rwsem); } static inline int i_mmap_trylock_read(struct address_space *mapping) { return down_read_trylock(&mapping->i_mmap_rwsem); } static inline void i_mmap_lock_read(struct address_space *mapping) { down_read(&mapping->i_mmap_rwsem); } static inline void i_mmap_unlock_read(struct address_space *mapping) { up_read(&mapping->i_mmap_rwsem); } static inline void i_mmap_assert_locked(struct address_space *mapping) { lockdep_assert_held(&mapping->i_mmap_rwsem); } static inline void i_mmap_assert_write_locked(struct address_space *mapping) { lockdep_assert_held_write(&mapping->i_mmap_rwsem); } /* * Might pages of this file be mapped into userspace? */ static inline int mapping_mapped(struct address_space *mapping) { return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } /* * Might pages of this file have been modified in userspace? * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * * If i_mmap_writable is negative, no new writable mappings are allowed. You * can only deny writable mappings, if none exists right now. */ static inline int mapping_writably_mapped(struct address_space *mapping) { return atomic_read(&mapping->i_mmap_writable) > 0; } static inline int mapping_map_writable(struct address_space *mapping) { return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? 0 : -EPERM; } static inline void mapping_unmap_writable(struct address_space *mapping) { atomic_dec(&mapping->i_mmap_writable); } static inline int mapping_deny_writable(struct address_space *mapping) { return atomic_dec_unless_positive(&mapping->i_mmap_writable) ? 0 : -EBUSY; } static inline void mapping_allow_writable(struct address_space *mapping) { atomic_inc(&mapping->i_mmap_writable); } /* * Use sequence counter to get consistent i_size on 32-bit processors. */ #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include <linux/seqlock.h> #define __NEED_I_SIZE_ORDERED #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount) #else #define i_size_ordered_init(inode) do { } while (0) #endif struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) /* * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to * cache the ACL. This also means that ->get_inode_acl() can be called in RCU * mode with the LOOKUP_RCU flag. */ #define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * uncached_acl_sentinel(struct task_struct *task) { return (void *)task + 1; } static inline bool is_uncached_acl(struct posix_acl *acl) { return (long)acl & 1; } #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning * of the 'struct inode' */ struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif const struct inode_operations *i_op; struct super_block *i_sb; struct address_space *i_mapping; #ifdef CONFIG_SECURITY void *i_security; #endif /* Stat data, not accessed from path walking */ unsigned long i_ino; /* * Filesystems may only read i_nlink directly. They shall use the * following functions for modification: * * (set|clear|inc|drop)_nlink * inode_(inc|dec)_link_count */ union { const unsigned int i_nlink; unsigned int __i_nlink; }; dev_t i_rdev; loff_t i_size; time64_t i_atime_sec; time64_t i_mtime_sec; time64_t i_ctime_sec; u32 i_atime_nsec; u32 i_mtime_nsec; u32 i_ctime_nsec; u32 i_generation; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif /* Misc */ unsigned long i_state; struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_io_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ /* foreign inode detection, see wbc_detach_inode() */ int i_wb_frn_winner; u16 i_wb_frn_avg_time; u16 i_wb_frn_history; #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; struct list_head i_wb_list; /* backing dev writeback list */ union { struct hlist_head i_dentry; struct rcu_head i_rcu; }; atomic64_t i_version; atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) atomic_t i_readcount; /* struct files open RO */ #endif union { const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ void (*free_inode)(struct inode *); }; struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; union { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; }; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ /* 32-bit hole reserved for expanding i_fsnotify_mask */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_inode_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif void *i_private; /* fs or device private pointer */ } __randomize_layout; struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); static inline unsigned int i_blocksize(const struct inode *node) { return (1 << node->i_blkbits); } static inline int inode_unhashed(struct inode *inode) { return hlist_unhashed(&inode->i_hash); } /* * __mark_inode_dirty expects inodes to be hashed. Since we don't * want special inodes in the fileset inode space, we make them * appear hashed, but do not put on any lists. hlist_del() * will work fine and require no locking. */ static inline void inode_fake_hash(struct inode *inode) { hlist_add_fake(&inode->i_hash); } /* * inode->i_mutex nesting subclasses for the lock validator: * * 0: the object of the current VFS operation * 1: parent * 2: child/target * 3: xattr * 4: second non-directory * 5: second parent (when locking independent directories in rename) * * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { I_MUTEX_NORMAL, I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, I_MUTEX_NONDIR2, I_MUTEX_PARENT2, }; static inline void inode_lock(struct inode *inode) { down_write(&inode->i_rwsem); } static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); } static inline void inode_lock_shared(struct inode *inode) { down_read(&inode->i_rwsem); } static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); } static inline int inode_trylock(struct inode *inode) { return down_write_trylock(&inode->i_rwsem); } static inline int inode_trylock_shared(struct inode *inode) { return down_read_trylock(&inode->i_rwsem); } static inline int inode_is_locked(struct inode *inode) { return rwsem_is_locked(&inode->i_rwsem); } static inline void inode_lock_nested(struct inode *inode, unsigned subclass) { down_write_nested(&inode->i_rwsem, subclass); } static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass) { down_read_nested(&inode->i_rwsem, subclass); } static inline void filemap_invalidate_lock(struct address_space *mapping) { down_write(&mapping->invalidate_lock); } static inline void filemap_invalidate_unlock(struct address_space *mapping) { up_write(&mapping->invalidate_lock); } static inline void filemap_invalidate_lock_shared(struct address_space *mapping) { down_read(&mapping->invalidate_lock); } static inline int filemap_invalidate_trylock_shared( struct address_space *mapping) { return down_read_trylock(&mapping->invalidate_lock); } static inline void filemap_invalidate_unlock_shared( struct address_space *mapping) { up_read(&mapping->invalidate_lock); } void lock_two_nondirectories(struct inode *, struct inode*); void unlock_two_nondirectories(struct inode *, struct inode*); void filemap_invalidate_lock_two(struct address_space *mapping1, struct address_space *mapping2); void filemap_invalidate_unlock_two(struct address_space *mapping1, struct address_space *mapping2); /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic * with respect to the local cpu (unlike with preempt disabled), * but they don't need to be atomic with respect to other cpus like in * true SMP (so they need either to either locally disable irq around * the read or for example on x86 they can be still implemented as a * cmpxchg8b without the need of the lock prefix). For SMP compiles * and 64bit archs it makes no difference if preempt is enabled or not. */ static inline loff_t i_size_read(const struct inode *inode) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) loff_t i_size; unsigned int seq; do { seq = read_seqcount_begin(&inode->i_size_seqcount); i_size = inode->i_size; } while (read_seqcount_retry(&inode->i_size_seqcount, seq)); return i_size; #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) loff_t i_size; preempt_disable(); i_size = inode->i_size; preempt_enable(); return i_size; #else /* Pairs with smp_store_release() in i_size_write() */ return smp_load_acquire(&inode->i_size); #endif } /* * NOTE: unlike i_size_read(), i_size_write() does need locking around it * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount * can be lost, resulting in subsequent i_size_read() calls spinning forever. */ static inline void i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) preempt_disable(); write_seqcount_begin(&inode->i_size_seqcount); inode->i_size = i_size; write_seqcount_end(&inode->i_size_seqcount); preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); inode->i_size = i_size; preempt_enable(); #else /* * Pairs with smp_load_acquire() in i_size_read() to ensure * changes related to inode size (such as page contents) are * visible before we see the changed inode size. */ smp_store_release(&inode->i_size, i_size); #endif } static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); } static inline unsigned imajor(const struct inode *inode) { return MAJOR(inode->i_rdev); } struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ kuid_t uid, euid; /* uid/euid of process setting the owner */ int signum; /* posix.1b rt signal to be delivered on IO */ }; /** * struct file_ra_state - Track a file's readahead state. * @start: Where the most recent readahead started. * @size: Number of pages read in the most recent readahead. * @async_size: Numer of pages that were/are not needed immediately * and so were/are genuinely "ahead". Start next readahead when * the first of these pages is accessed. * @ra_pages: Maximum size of a readahead request, copied from the bdi. * @mmap_miss: How many mmap accesses missed in the page cache. * @prev_pos: The last byte in the most recent read request. * * When this structure is passed to ->readahead(), the "most recent" * readahead means the current readahead. */ struct file_ra_state { pgoff_t start; unsigned int size; unsigned int async_size; unsigned int ra_pages; unsigned int mmap_miss; loff_t prev_pos; }; /* * Check if @index falls in the readahead windows. */ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) { return (index >= ra->start && index < ra->start + ra->size); } /* * f_{lock,count,pos_lock} members can be highly contended and share * the same cacheline. f_{lock,mode} are very frequently used together * and so share the same cacheline as well. The read-mostly * f_{path,inode,op} are kept on a separate cacheline. */ struct file { union { /* fput() uses task work when closing and freeing file (default). */ struct callback_head f_task_work; /* fput() must use workqueue (most kernel threads). */ struct llist_node f_llist; unsigned int f_iocb_flags; }; /* * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; fmode_t f_mode; atomic_long_t f_count; struct mutex f_pos_lock; loff_t f_pos; unsigned int f_flags; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; u64 f_version; #ifdef CONFIG_SECURITY void *f_security; #endif /* needed for tty driver, and maybe others */ void *private_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct hlist_head *f_ep; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; /* for syncfs */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ unsigned char f_handle[] __counted_by(handle_bytes); }; static inline struct file *get_file(struct file *f) { long prior = atomic_long_fetch_inc_relaxed(&f->f_count); WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n"); return f; } struct file *get_file_rcu(struct file __rcu **f); struct file *get_file_active(struct file **f); #define file_count(x) atomic_long_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 #define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 #define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif /* legacy typedef, should eventually be removed */ typedef void *fl_owner_t; struct file_lock; struct file_lease; /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX #define OFFSET_MAX type_max(loff_t) #define OFFT_OFFSET_MAX type_max(off_t) #endif extern void send_sigio(struct fown_struct *fown, int fd, int band); static inline struct inode *file_inode(const struct file *f) { return f->f_inode; } /* * file_dentry() is a relic from the days that overlayfs was using files with a * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs. * In those days, file_dentry() was needed to get the underlying fs dentry that * matches f_inode. * Files with "fake" path should not exist nowadays, so use an assertion to make * sure that file_dentry() was not papering over filesystem bugs. */ static inline struct dentry *file_dentry(const struct file *file) { struct dentry *dentry = file->f_path.dentry; WARN_ON_ONCE(d_inode(dentry) != file_inode(file)); return dentry; } struct fasync_struct { rwlock_t fa_lock; int magic; int fa_fd; struct fasync_struct *fa_next; /* singly linked list */ struct file *fa_file; struct rcu_head fa_rcu; }; #define FASYNC_MAGIC 0x4601 /* SMP safe fasync helpers: */ extern int fasync_helper(int, struct file *, int, struct fasync_struct **); extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); extern int fasync_remove_entry(struct file *, struct fasync_struct **); extern struct fasync_struct *fasync_alloc(void); extern void fasync_free(struct fasync_struct *); /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force); extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); /* * sb->s_flags. Note that these mirror the equivalent MS_* flags where * represented in both. */ #define SB_RDONLY BIT(0) /* Mount read-only */ #define SB_NOSUID BIT(1) /* Ignore suid and sgid bits */ #define SB_NODEV BIT(2) /* Disallow access to device special files */ #define SB_NOEXEC BIT(3) /* Disallow program execution */ #define SB_SYNCHRONOUS BIT(4) /* Writes are synced at once */ #define SB_MANDLOCK BIT(6) /* Allow mandatory locks on an FS */ #define SB_DIRSYNC BIT(7) /* Directory modifications are synchronous */ #define SB_NOATIME BIT(10) /* Do not update access times. */ #define SB_NODIRATIME BIT(11) /* Do not update directory access times */ #define SB_SILENT BIT(15) #define SB_POSIXACL BIT(16) /* Supports POSIX ACLs */ #define SB_INLINECRYPT BIT(17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT BIT(22) /* this is a kern_mount call */ #define SB_I_VERSION BIT(23) /* Update inode I_version field */ #define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ #define SB_DEAD BIT(21) #define SB_DYING BIT(24) #define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) #define SB_BORN BIT(29) #define SB_ACTIVE BIT(30) #define SB_NOUSER BIT(31) /* These flags relate to encoding and casefolding */ #define SB_ENC_STRICT_MODE_FL (1 << 0) #define sb_has_strict_encoding(sb) \ (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) /* * Umount options */ #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ #define MNT_DETACH 0x00000002 /* Just detach from the tree */ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ #define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ #define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ #define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 #define SB_I_EVM_HMAC_UNSUPPORTED 0x00000080 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ /* Possible states of 'frozen' field */ enum { SB_UNFROZEN = 0, /* FS is unfrozen */ SB_FREEZE_WRITE = 1, /* Writes, dir ops, ioctls frozen */ SB_FREEZE_PAGEFAULT = 2, /* Page faults stopped as well */ SB_FREEZE_FS = 3, /* For internal FS use (e.g. to stop * internal threads if needed) */ SB_FREEZE_COMPLETE = 4, /* ->freeze_fs finished successfully */ }; #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1) struct sb_writers { unsigned short frozen; /* Is sb frozen? */ int freeze_kcount; /* How many kernel freeze requests? */ int freeze_ucount; /* How many userspace freeze requests? */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ unsigned char s_blocksize_bits; unsigned long s_blocksize; loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; const struct quotactl_ops *s_qcop; const struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_iflags; /* internal SB_I_* flags */ unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; int s_count; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; #endif const struct xattr_handler * const *s_xattr; #ifdef CONFIG_FS_ENCRYPTION const struct fscrypt_operations *s_cop; struct fscrypt_keyring *s_master_keys; /* master crypto keys in use */ #endif #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif #if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ struct file *s_bdev_file; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; unsigned int s_quota_types; /* Bitmask of supported quota types */ struct quota_info s_dquot; /* Diskquota specific options */ struct sb_writers s_writers; /* * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and * s_fsnotify_info together for cache efficiency. They are frequently * accessed and rarely modified. */ void *s_fs_info; /* Filesystem private info */ /* Granularity of c/m/atime in ns (cannot be worse than a second) */ u32 s_time_gran; /* Time limits for c/m/atime in seconds */ time64_t s_time_min; time64_t s_time_max; #ifdef CONFIG_FSNOTIFY __u32 s_fsnotify_mask; struct fsnotify_sb_info *s_fsnotify_info; #endif /* * q: why are s_id and s_sysfs_name not the same? both are human * readable strings that identify the filesystem * a: s_id is allowed to change at runtime; it's used in log messages, * and we want to when a device starts out as single device (s_id is dev * name) but then a device is hot added and we have to switch to * identifying it by UUID * but s_sysfs_name is a handle for programmatic access, and can't * change at runtime */ char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */ /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */ char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; /* * The next field is for VFS *only*. No filesystems have any business * even looking at it. You had been warned. */ struct mutex s_vfs_rename_mutex; /* Kludge */ /* * Filesystem subtype. If non-empty the filesystem type field * in /proc/mounts will be "type.subtype" */ const char *s_subtype; const struct dentry_operations *s_d_op; /* default d_op for dentries */ struct shrinker *s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; /* Read-only state of the superblock is being changed */ int s_readonly_remount; /* per-sb errseq_t for reporting writeback errors via syncfs */ errseq_t s_wb_err; /* AIO completions deferred from interrupt context */ struct workqueue_struct *s_dio_done_wq; struct hlist_head s_pins; /* * Owning user namespace and default context in which to * interpret filesystem uids, gids, quotas, device nodes, * xattrs and security labels. */ struct user_namespace *s_user_ns; /* * The list_lru structure is essentially just a pointer to a table * of per-node lru lists, each of which has its own spinlock. * There is no need to put them into separate cachelines. */ struct list_lru s_dentry_lru; struct list_lru s_inode_lru; struct rcu_head rcu; struct work_struct destroy_work; struct mutex s_sync_lock; /* sync serialisation lock */ /* * Indicates how deep in a filesystem stack this SB is */ int s_stack_depth; /* s_inode_list_lock protects s_inodes */ spinlock_t s_inode_list_lock ____cacheline_aligned_in_smp; struct list_head s_inodes; /* all inodes */ spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ } __randomize_layout; static inline struct user_namespace *i_user_ns(const struct inode *inode) { return inode->i_sb->s_user_ns; } /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can * instead deal with the raw numeric values that are stored * in the filesystem. */ static inline uid_t i_uid_read(const struct inode *inode) { return from_kuid(i_user_ns(inode), inode->i_uid); } static inline gid_t i_gid_read(const struct inode *inode) { return from_kgid(i_user_ns(inode), inode->i_gid); } static inline void i_uid_write(struct inode *inode, uid_t uid) { inode->i_uid = make_kuid(i_user_ns(inode), uid); } static inline void i_gid_write(struct inode *inode, gid_t gid) { inode->i_gid = make_kgid(i_user_ns(inode), gid); } /** * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping * @idmap: idmap of the mount the inode was found from * @inode: inode to map * * Return: whe inode's i_uid mapped down according to @idmap. * If the inode's i_uid has no mapping INVALID_VFSUID is returned. */ static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap, const struct inode *inode) { return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid); } /** * i_uid_needs_update - check whether inode's i_uid needs to be updated * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Check whether the $inode's i_uid field needs to be updated taking idmapped * mounts into account if the filesystem supports it. * * Return: true if @inode's i_uid field needs to be updated, false if not. */ static inline bool i_uid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { return ((attr->ia_valid & ATTR_UID) && !vfsuid_eq(attr->ia_vfsuid, i_uid_into_vfsuid(idmap, inode))); } /** * i_uid_update - update @inode's i_uid field * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_uid field translating the vfsuid of any idmapped * mount into the filesystem kuid. */ static inline void i_uid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { if (attr->ia_valid & ATTR_UID) inode->i_uid = from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid); } /** * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping * @idmap: idmap of the mount the inode was found from * @inode: inode to map * * Return: the inode's i_gid mapped down according to @idmap. * If the inode's i_gid has no mapping INVALID_VFSGID is returned. */ static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap, const struct inode *inode) { return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid); } /** * i_gid_needs_update - check whether inode's i_gid needs to be updated * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Check whether the $inode's i_gid field needs to be updated taking idmapped * mounts into account if the filesystem supports it. * * Return: true if @inode's i_gid field needs to be updated, false if not. */ static inline bool i_gid_needs_update(struct mnt_idmap *idmap, const struct iattr *attr, const struct inode *inode) { return ((attr->ia_valid & ATTR_GID) && !vfsgid_eq(attr->ia_vfsgid, i_gid_into_vfsgid(idmap, inode))); } /** * i_gid_update - update @inode's i_gid field * @idmap: idmap of the mount the inode was found from * @attr: the new attributes of @inode * @inode: the inode to update * * Safely update @inode's i_gid field translating the vfsgid of any idmapped * mount into the filesystem kgid. */ static inline void i_gid_update(struct mnt_idmap *idmap, const struct iattr *attr, struct inode *inode) { if (attr->ia_valid & ATTR_GID) inode->i_gid = from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid); } /** * inode_fsuid_set - initialize inode's i_uid field with callers fsuid * @inode: inode to initialize * @idmap: idmap of the mount the inode was found from * * Initialize the i_uid field of @inode. If the inode was found/created via * an idmapped mount map the caller's fsuid according to @idmap. */ static inline void inode_fsuid_set(struct inode *inode, struct mnt_idmap *idmap) { inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); } /** * inode_fsgid_set - initialize inode's i_gid field with callers fsgid * @inode: inode to initialize * @idmap: idmap of the mount the inode was found from * * Initialize the i_gid field of @inode. If the inode was found/created via * an idmapped mount map the caller's fsgid according to @idmap. */ static inline void inode_fsgid_set(struct inode *inode, struct mnt_idmap *idmap) { inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode)); } /** * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped * @sb: the superblock we want a mapping in * @idmap: idmap of the relevant mount * * Check whether the caller's fsuid and fsgid have a valid mapping in the * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map * the caller's fsuid and fsgid according to the @idmap first. * * Return: true if fsuid and fsgid is mapped, false if not. */ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct mnt_idmap *idmap) { struct user_namespace *fs_userns = sb->s_user_ns; kuid_t kuid; kgid_t kgid; kuid = mapped_fsuid(idmap, fs_userns); if (!uid_valid(kuid)) return false; kgid = mapped_fsgid(idmap, fs_userns); if (!gid_valid(kgid)) return false; return kuid_has_mapping(fs_userns, kuid) && kgid_has_mapping(fs_userns, kgid); } struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); static inline time64_t inode_get_atime_sec(const struct inode *inode) { return inode->i_atime_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { return inode->i_atime_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { struct timespec64 ts = { .tv_sec = inode_get_atime_sec(inode), .tv_nsec = inode_get_atime_nsec(inode) }; return ts; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { inode->i_atime_sec = ts.tv_sec; inode->i_atime_nsec = ts.tv_nsec; return ts; } static inline struct timespec64 inode_set_atime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_atime_to_ts(inode, ts); } static inline time64_t inode_get_mtime_sec(const struct inode *inode) { return inode->i_mtime_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { return inode->i_mtime_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { struct timespec64 ts = { .tv_sec = inode_get_mtime_sec(inode), .tv_nsec = inode_get_mtime_nsec(inode) }; return ts; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { inode->i_mtime_sec = ts.tv_sec; inode->i_mtime_nsec = ts.tv_nsec; return ts; } static inline struct timespec64 inode_set_mtime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_mtime_to_ts(inode, ts); } static inline time64_t inode_get_ctime_sec(const struct inode *inode) { return inode->i_ctime_sec; } static inline long inode_get_ctime_nsec(const struct inode *inode) { return inode->i_ctime_nsec; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) { struct timespec64 ts = { .tv_sec = inode_get_ctime_sec(inode), .tv_nsec = inode_get_ctime_nsec(inode) }; return ts; } static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { inode->i_ctime_sec = ts.tv_sec; inode->i_ctime_nsec = ts.tv_nsec; return ts; } /** * inode_set_ctime - set the ctime in the inode * @inode: inode in which to set the ctime * @sec: tv_sec value to set * @nsec: tv_nsec value to set * * Set the ctime in @inode to { @sec, @nsec } */ static inline struct timespec64 inode_set_ctime(struct inode *inode, time64_t sec, long nsec) { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; return inode_set_ctime_to_ts(inode, ts); } struct timespec64 simple_inode_init_ts(struct inode *inode); /* * Snapshotting support. */ /* * These are internal functions, please use sb_start_{write,pagefault,intwrite} * instead. */ static inline void __sb_end_write(struct super_block *sb, int level) { percpu_up_read(sb->s_writers.rw_sem + level-1); } static inline void __sb_start_write(struct super_block *sb, int level) { percpu_down_read(sb->s_writers.rw_sem + level - 1); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) { return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); } #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) #define __sb_writers_release(sb, lev) \ percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) /** * __sb_write_started - check if sb freeze level is held * @sb: the super we write to * @level: the freeze level * * * > 0 - sb freeze level is held * * 0 - sb freeze level is not held * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN */ static inline int __sb_write_started(const struct super_block *sb, int level) { return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1); } /** * sb_write_started - check if SB_FREEZE_WRITE is held * @sb: the super we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. */ static inline bool sb_write_started(const struct super_block *sb) { return __sb_write_started(sb, SB_FREEZE_WRITE); } /** * sb_write_not_started - check if SB_FREEZE_WRITE is not held * @sb: the super we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. */ static inline bool sb_write_not_started(const struct super_block *sb) { return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0; } /** * file_write_started - check if SB_FREEZE_WRITE is held * @file: the file we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. * May be false positive with !S_ISREG, because file_start_write() has * no effect on !S_ISREG. */ static inline bool file_write_started(const struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; return sb_write_started(file_inode(file)->i_sb); } /** * file_write_not_started - check if SB_FREEZE_WRITE is not held * @file: the file we write to * * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN. * May be false positive with !S_ISREG, because file_start_write() has * no effect on !S_ISREG. */ static inline bool file_write_not_started(const struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; return sb_write_not_started(file_inode(file)->i_sb); } /** * sb_end_write - drop write access to a superblock * @sb: the super we wrote to * * Decrement number of writers to the filesystem. Wake up possible waiters * wanting to freeze the filesystem. */ static inline void sb_end_write(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_WRITE); } /** * sb_end_pagefault - drop write access to a superblock from a page fault * @sb: the super we wrote to * * Decrement number of processes handling write page fault to the filesystem. * Wake up possible waiters wanting to freeze the filesystem. */ static inline void sb_end_pagefault(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_PAGEFAULT); } /** * sb_end_intwrite - drop write access to a superblock for internal fs purposes * @sb: the super we wrote to * * Decrement fs-internal number of writers to the filesystem. Wake up possible * waiters wanting to freeze the filesystem. */ static inline void sb_end_intwrite(struct super_block *sb) { __sb_end_write(sb, SB_FREEZE_FS); } /** * sb_start_write - get write access to a superblock * @sb: the super we write to * * When a process wants to write data or metadata to a file system (i.e. dirty * a page or an inode), it should embed the operation in a sb_start_write() - * sb_end_write() pair to get exclusion against file system freezing. This * function increments number of writers preventing freezing. If the file * system is already frozen, the function waits until the file system is * thawed. * * Since freeze protection behaves as a lock, users have to preserve * ordering of freeze protection and other filesystem locks. Generally, * freeze protection should be the outermost lock. In particular, we have: * * sb_start_write * -> i_mutex (write path, truncate, directory ops, ...) * -> s_umount (freeze_super, thaw_super) */ static inline void sb_start_write(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_WRITE); } static inline bool sb_start_write_trylock(struct super_block *sb) { return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** * sb_start_pagefault - get write access to a superblock from a page fault * @sb: the super we write to * * When a process starts handling write page fault, it should embed the * operation into sb_start_pagefault() - sb_end_pagefault() pair to get * exclusion against file system freezing. This is needed since the page fault * is going to dirty a page. This function increments number of running page * faults preventing freezing. If the file system is already frozen, the * function waits until the file system is thawed. * * Since page fault freeze protection behaves as a lock, users have to preserve * ordering of freeze protection and other filesystem locks. It is advised to * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault * handling code implies lock dependency: * * mmap_lock * -> sb_start_pagefault */ static inline void sb_start_pagefault(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /** * sb_start_intwrite - get write access to a superblock for internal fs purposes * @sb: the super we write to * * This is the third level of protection against filesystem freezing. It is * free for use by a filesystem. The only requirement is that it must rank * below sb_start_pagefault. * * For example filesystem can call sb_start_intwrite() when starting a * transaction which somewhat eases handling of freezing for internal sources * of filesystem changes (internal fs threads, discarding preallocation on file * close, etc.). */ static inline void sb_start_intwrite(struct super_block *sb) { __sb_start_write(sb, SB_FREEZE_FS); } static inline bool sb_start_intwrite_trylock(struct super_block *sb) { return __sb_start_write_trylock(sb, SB_FREEZE_FS); } bool inode_owner_or_capable(struct mnt_idmap *idmap, const struct inode *inode); /* * VFS helper functions.. */ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); int vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, struct inode **); /** * struct renamedata - contains all information required for renaming * @old_mnt_idmap: idmap of the old mount the inode was found from * @old_dir: parent of source * @old_dentry: source * @new_mnt_idmap: idmap of the new mount the inode was found from * @new_dir: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { struct mnt_idmap *old_mnt_idmap; struct inode *old_dir; struct dentry *old_dentry; struct mnt_idmap *new_mnt_idmap; struct inode *new_dir; struct dentry *new_dentry; struct inode **delegated_inode; unsigned int flags; } __randomize_layout; int vfs_rename(struct renamedata *); static inline int vfs_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); } struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred); struct file *kernel_file_open(const struct path *path, int flags, const struct cred *cred); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), void *); int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); int vfs_utimes(const struct path *path, struct timespec64 *times); extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #else #define compat_ptr_ioctl NULL #endif /* * VFS file helper functions. */ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, const struct inode *dir, umode_t mode); extern bool may_open_dev(const struct path *path); umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode); bool in_group_or_capable(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t vfsgid); /* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. * This allows the kernel to read directories into kernel space or * to have different dirent layouts depending on the binary type. * Return 'true' to keep going and 'false' if there are no more entries. */ struct dir_context; typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); struct dir_context { filldir_t actor; loff_t pos; }; /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. * * NOMMU_MAP_COPY: Copy can be mapped (MAP_PRIVATE) * NOMMU_MAP_DIRECT: Can be mapped directly (MAP_SHARED) * NOMMU_MAP_READ: Can be mapped for reading * NOMMU_MAP_WRITE: Can be mapped for writing * NOMMU_MAP_EXEC: Can be mapped for execution */ #define NOMMU_MAP_COPY 0x00000001 #define NOMMU_MAP_DIRECT 0x00000008 #define NOMMU_MAP_READ VM_MAYREAD #define NOMMU_MAP_WRITE VM_MAYWRITE #define NOMMU_MAP_EXEC VM_MAYEXEC #define NOMMU_VMFLAGS \ (NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC) /* * These flags control the behavior of the remap_file_range function pointer. * If it is called with len == 0 that means "remap to end of source file". * See Documentation/filesystems/vfs.rst for more details about this call. * * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate) * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request */ #define REMAP_FILE_DEDUP (1 << 0) #define REMAP_FILE_CAN_SHORTEN (1 << 1) /* * These flags signal that the caller is ok with altering various aspects of * the behavior of the remap operation. The changes must be made by the * implementation; the vfs remap helper functions can take advantage of them. * Flags in this category exist to preserve the quirky behavior of the hoisted * btrfs clone/dedupe ioctls. */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) /* * These flags control the behavior of vfs_copy_file_range(). * They are not available to the user via syscall. * * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops */ #define COPY_FILE_SPLICE (1 << 0) struct iov_iter; struct io_uring_cmd; struct offset_ctx; typedef unsigned int __bitwise fop_flags_t; struct file_operations { struct module *owner; fop_flags_t fop_flags; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, unsigned int flags); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); #ifndef CONFIG_MMU unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); } __randomize_layout; /* Supports async buffered reads */ #define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0)) /* Supports async buffered writes */ #define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1)) /* Supports synchronous page faults for mappings */ #define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2)) /* Supports non-exclusive O_DIRECT writes from multiple threads */ #define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) /* Contains huge pages */ #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, int (*) (struct file *, struct dir_context *)); #define WRAP_DIR_ITER(x) \ static int shared_##x(struct file *file , struct dir_context *ctx) \ { return wrap_directory_iterator(file, ctx, x); } struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct mnt_idmap *, struct inode *, int); struct posix_acl * (*get_inode_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); int (*create) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *, const char *); int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *); int (*getattr) (struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode); int (*tmpfile) (struct mnt_idmap *, struct inode *, struct file *, umode_t); struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *, int); int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { return file->f_op->mmap(file, vma); } extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write); int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, const struct iomap_ops *dax_read_ops); int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); extern int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same); extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, struct file *dst_file, loff_t dst_pos, loff_t len, unsigned int remap_flags); /** * enum freeze_holder - holder of the freeze * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed * * Indicate who the owner of the freeze or thaw request is and whether * the freeze needs to be exclusive or can nest. * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the * same holder aren't allowed. It is however allowed to hold a single * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at * the same time. This is relied upon by some filesystems during online * repair or similar. */ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), FREEZE_MAY_NEST = (1U << 2), }; struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); void (*free_inode)(struct inode *); void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_super) (struct super_block *, enum freeze_holder who); int (*freeze_fs) (struct super_block *); int (*thaw_super) (struct super_block *, enum freeze_holder who); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct dentry *); int (*show_devname)(struct seq_file *, struct dentry *); int (*show_path)(struct seq_file *, struct dentry *); int (*show_stats)(struct seq_file *, struct dentry *); #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); struct dquot __rcu **(*get_dquots)(struct inode *); #endif long (*nr_cached_objects)(struct super_block *, struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); void (*shutdown)(struct super_block *sb); }; /* * Inode flags - they have no relation to superblock flags now */ #define S_SYNC (1 << 0) /* Writes are synced at once */ #define S_NOATIME (1 << 1) /* Do not update access times */ #define S_APPEND (1 << 2) /* Append-only file */ #define S_IMMUTABLE (1 << 3) /* Immutable file */ #define S_DEAD (1 << 4) /* removed, but still open directory */ #define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */ #define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */ #define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */ #define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE (1 << 9) /* Inode is fs-internal */ #define S_IMA (1 << 10) /* Inode has an associated IMA struct */ #define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */ #define S_NOSEC (1 << 12) /* no suid or xattr security attributes */ #ifdef CONFIG_FS_DAX #define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */ #else #define S_DAX 0 /* Make all the DAX code disappear */ #endif #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system * flags just means all the inodes inherit those flags by default. It might be * possible to override it selectively if you really wanted to with some * ioctl() that is not currently implemented. * * Exception: SB_RDONLY is always applied to the entire file system. * * Unfortunately, it is possible to change a filesystems flags with it mounted * with files in use. This means that all of the inodes will not have their * i_flags updated. Hence, i_flags no longer inherit the superblock mount * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org */ #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; } #define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) #define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) #define IS_DIRSYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, SB_MANDLOCK) #define IS_NOATIME(inode) __IS_FLG(inode, SB_RDONLY|SB_NOATIME) #define IS_I_VERSION(inode) __IS_FLG(inode, SB_I_VERSION) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) #ifdef CONFIG_FS_POSIX_ACL #define IS_POSIXACL(inode) __IS_FLG(inode, SB_POSIXACL) #else #define IS_POSIXACL(inode) 0 #endif #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #ifdef CONFIG_SWAP #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #else #define IS_SWAPFILE(inode) ((void)(inode), 0U) #endif #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) #define IS_DAX(inode) ((inode)->i_flags & S_DAX) #define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_CASEFOLDED(inode) ((inode)->i_flags & S_CASEFOLD) #define IS_VERITY(inode) ((inode)->i_flags & S_VERITY) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) { return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)); } static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = filp->f_iocb_flags, .ki_ioprio = get_current_ioprio(), }; } static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, .ki_flags = kiocb_src->ki_flags, .ki_ioprio = kiocb_src->ki_ioprio, .ki_pos = kiocb_src->ki_pos, }; } /* * Inode state bits. Protected by inode->i_lock * * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. * * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at * various stages of removing an inode. * * Two bits are used for locking and completion notification, I_NEW and I_SYNC. * * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on * fdatasync() (unless I_DIRTY_DATASYNC is also set). * Timestamp updates are the usual cause. * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of * these changes separately from I_DIRTY_SYNC so that we * don't have to write inode on fdatasync() when only * e.g. the timestamps have changed. * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. * I_DIRTY_TIME The inode itself has dirty timestamps, and the * lazytime mount option is enabled. We keep track of this * separately from I_DIRTY_SYNC in order to implement * lazytime. This gets cleared if I_DIRTY_INODE * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But * I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already * in place because writeback might already be in progress * and we don't want to lose the time update * I_NEW Serves as both a mutex and completion notification. * New inodes set I_NEW. If two processes both create * the same inode, one of them will release its inode and * wait for I_NEW to be released before returning. * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can * also cause waiting on I_NEW, without I_NEW actually * being set. find_inode() uses this to prevent returning * nearly-dead inodes. * I_WILL_FREE Must be set when calling write_inode_now() if i_count * is zero. I_FREEING must be set when I_WILL_FREE is * cleared. * I_FREEING Set when inode is about to be freed but still has dirty * pages or buffers attached or the inode itself is still * dirty. * I_CLEAR Added by clear_inode(). In this state the inode is * clean and can be destroyed. Inode keeps I_FREEING. * * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are * prohibited for many purposes. iget() must wait for * the inode to be completely released, then create it * anew. Other functions will just ignore such inodes, * if appropriate. I_NEW is used for waiting. * * I_SYNC Writeback of inode is running. The bit is set during * data writeback, and cleared with a wakeup on the bit * address once it is done. The bit is also used to pin * the inode in memory for flusher thread. * * I_REFERENCED Marks the inode as recently references on the LRU list. * * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). * * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell * wb stat updates to grab the i_pages lock. See * inode_switch_wbs_work_fn() for details. * * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * and work dirs among overlayfs mounts. * * I_CREATING New object's inode in the middle of setting up. * * I_DONTCACHE Evict inode as soon as it is not used anymore. * * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. * Used to detect that mark_inode_dirty() should not move * inode between dirty lists. * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding * i_count. * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) #define I_DIRTY_DATASYNC (1 << 1) #define I_DIRTY_PAGES (1 << 2) #define __I_NEW 3 #define I_NEW (1 << __I_NEW) #define I_WILL_FREE (1 << 4) #define I_FREEING (1 << 5) #define I_CLEAR (1 << 6) #define __I_SYNC 7 #define I_SYNC (1 << __I_SYNC) #define I_REFERENCED (1 << 8) #define __I_DIO_WAKEUP 9 #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) #define I_PINNING_NETFS_WB (1 << 18) #define __I_LRU_ISOLATING 19 #define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) extern void __mark_inode_dirty(struct inode *, int); static inline void mark_inode_dirty(struct inode *inode) { __mark_inode_dirty(inode, I_DIRTY); } static inline void mark_inode_dirty_sync(struct inode *inode) { __mark_inode_dirty(inode, I_DIRTY_SYNC); } /* * Returns true if the given inode itself only has dirty timestamps (its pages * may still be dirty) and isn't currently being allocated or freed. * Filesystems should call this if when writing an inode when lazytime is * enabled, they want to opportunistically write the timestamps of other inodes * located very nearby on-disk, e.g. in the same inode block. This returns true * if the given inode is in need of such an opportunistic update. Requires * i_lock, or at least later re-checking under i_lock. */ static inline bool inode_is_dirtytime_only(struct inode *inode) { return (inode->i_state & (I_DIRTY_TIME | I_NEW | I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME; } extern void inc_nlink(struct inode *inode); extern void drop_nlink(struct inode *inode); extern void clear_nlink(struct inode *inode); extern void set_nlink(struct inode *inode, unsigned int nlink); static inline void inode_inc_link_count(struct inode *inode) { inc_nlink(inode); mark_inode_dirty(inode); } static inline void inode_dec_link_count(struct inode *inode) { drop_nlink(inode); mark_inode_dirty(inode); } enum file_time_flags { S_ATIME = 1, S_MTIME = 2, S_CTIME = 4, S_VERSION = 8, }; extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); int inode_update_time(struct inode *inode, int flags); static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) touch_atime(&file->f_path); } extern int file_modified(struct file *file); int kiocb_modified(struct kiocb *iocb); int sync_inode_metadata(struct inode *inode, int wait); struct file_system_type { const char *name; int fs_flags; #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 #define FS_HAS_SUBTYPE 4 #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; struct dentry *(*mount) (struct file_system_type *, int, const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; struct hlist_head fs_supers; struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; struct lock_class_key s_vfs_rename_key; struct lock_class_key s_writers_key[SB_FREEZE_LEVELS]; struct lock_class_key i_lock_key; struct lock_class_key i_mutex_key; struct lock_class_key invalidate_lock_key; struct lock_class_key i_mutex_dir_key; }; #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); int set_anon_super_fc(struct super_block *s, struct fs_context *fc); int get_anon_bdev(dev_t *); void free_anon_bdev(dev_t); struct super_block *sget_fc(struct fs_context *fc, int (*test)(struct super_block *, struct fs_context *), int (*set)(struct super_block *, struct fs_context *)); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), int flags, void *data); struct super_block *sget_dev(struct fs_context *fc, dev_t dev); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) #define fops_put(fops) \ do { if (fops) module_put((fops)->owner); } while(0) /* * This one is to be used *ONLY* from ->open() instances. * fops must be non-NULL, pinned down *and* module dependencies * should be sufficient to pin the caller down as well. */ #define replace_fops(f, fops) \ do { \ struct file *__file = (f); \ fops_put(__file->f_op); \ BUG_ON(!(__file->f_op = (fops))); \ } while(0) extern int register_filesystem(struct file_system_type *); extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); int freeze_super(struct super_block *super, enum freeze_holder who); int thaw_super(struct super_block *super, enum freeze_holder who); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len) { if (WARN_ON(len > sizeof(sb->s_uuid))) len = sizeof(sb->s_uuid); sb->s_uuid_len = len; memcpy(&sb->s_uuid, uuid, len); } /* set sb sysfs name based on sb->s_bdev */ static inline void super_set_sysfs_name_bdev(struct super_block *sb) { snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev); } /* set sb sysfs name based on sb->s_uuid */ static inline void super_set_sysfs_name_uuid(struct super_block *sb) { WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid)); snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b); } /* set sb sysfs name based on sb->s_id */ static inline void super_set_sysfs_name_id(struct super_block *sb) { strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name)); } /* try to use something standard before you use this */ __printf(2, 3) static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...) { va_list args; va_start(args, fmt); vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args); va_end(args); } extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); int inode_update_timestamps(struct inode *inode, int flags); int generic_update_time(struct inode *, int); /* /sys/fs */ extern struct kobject *fs_kobj; #define MAX_RW_COUNT (INT_MAX & PAGE_MASK) /* fs/open.c */ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ atomic_t refcnt; struct audit_names *aname; const char iname[]; }; static_assert(offsetof(struct filename, iname) % sizeof(long) == 0); static inline struct mnt_idmap *file_mnt_idmap(const struct file *file) { return mnt_idmap(file->f_path.mnt); } /** * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { return mnt_idmap(mnt) != &nop_mnt_idmap; } extern long vfs_truncate(const struct path *, loff_t); int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode); extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(const struct path *, const char *, int, umode_t); static inline struct file *file_open_root_mnt(struct vfsmount *mnt, const char *name, int flags, umode_t mode) { return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root}, name, flags, mode); } struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct path *backing_file_user_path(struct file *f); /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file * stored in ->vm_file is a backing file whose f_inode is on the underlying * filesystem. When the mapped file path and inode number are displayed to * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the * path and inode number to display to the user, which is the path of the fd * that user has requested to map and the inode number that would be returned * by fstat() on that same fd. */ /* Get the path to display in /proc/<pid>/maps */ static inline const struct path *file_user_path(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return backing_file_user_path(f); return &f->f_path; } /* Get the inode whose inode number to display in /proc/<pid>/maps */ static inline const struct inode *file_user_inode(struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return d_inode(backing_file_user_path(f)->dentry); return file_inode(f); } static inline struct file *file_clone_open(struct file *file) { return dentry_open(&file->f_path, file->f_flags, file->f_cred); } extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); extern void putname(struct filename *name); extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern int finish_no_open(struct file *file, struct dentry *dentry); /* Helper for the simple case when original dentry is used */ static inline int finish_open_simple(struct file *file, int error) { if (error) return error; return finish_open(file, file->f_path.dentry, NULL); } /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(void); extern struct kmem_cache *names_cachep; #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) extern struct super_block *blockdev_superblock; static inline bool sb_is_blkdev_sb(struct super_block *sb) { return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; } void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; /* fs/char_dev.c */ #define CHRDEV_MAJOR_MAX 512 /* Marks the bottom of the first segment of free char majors */ #define CHRDEV_MAJOR_DYN_END 234 /* Marks the top and bottom of the second segment of free char majors */ #define CHRDEV_MAJOR_DYN_EXT_START 511 #define CHRDEV_MAJOR_DYN_EXT_END 384 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); extern int __register_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name, const struct file_operations *fops); extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name); extern void unregister_chrdev_region(dev_t, unsigned); extern void chrdev_show(struct seq_file *,off_t); static inline int register_chrdev(unsigned int major, const char *name, const struct file_operations *fops) { return __register_chrdev(major, 0, 256, name, fops); } static inline void unregister_chrdev(unsigned int major, const char *name) { __unregister_chrdev(major, 0, 256, name); } extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, loff_t lend); extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); static inline int file_write_and_wait(struct file *file) { return file_write_and_wait_range(file, 0, LLONG_MAX); } extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, unsigned int flags); static inline bool iocb_is_dsync(const struct kiocb *iocb) { return (iocb->ki_flags & IOCB_DSYNC) || IS_SYNC(iocb->ki_filp->f_mapping->host); } /* * Sync the bytes written if this was a synchronous write. Expect ki_pos * to already be updated for the write, and will return either the amount * of bytes passed in, or an error if syncing the file failed. */ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) { if (iocb_is_dsync(iocb)) { int ret = vfs_fsync_range(iocb->ki_filp, iocb->ki_pos - count, iocb->ki_pos - 1, (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); if (ret) return ret; } return count; } extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK extern int bmap(struct inode *inode, sector_t *block); #else static inline int bmap(struct inode *inode, sector_t *block) { return -EINVAL; } #endif int notify_change(struct mnt_idmap *, struct dentry *, struct iattr *, struct inode **); int inode_permission(struct mnt_idmap *, struct inode *, int); int generic_permission(struct mnt_idmap *, struct inode *, int); static inline int file_permission(struct file *file, int mask) { return inode_permission(file_mnt_idmap(file), file_inode(file), mask); } static inline int path_permission(const struct path *path, int mask) { return inode_permission(mnt_idmap(path->mnt), d_inode(path->dentry), mask); } int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) { return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); } static inline bool inode_wrong_type(const struct inode *inode, umode_t mode) { return (inode->i_mode ^ mode) & S_IFMT; } /** * file_start_write - get write access to a superblock for regular file io * @file: the file we want to write to * * This is a variant of sb_start_write() which is a noop on non-regualr file. * Should be matched with a call to file_end_write(). */ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; return sb_start_write_trylock(file_inode(file)->i_sb); } /** * file_end_write - drop write access to a superblock of a regular file * @file: the file we wrote to * * Should be matched with a call to file_start_write(). */ static inline void file_end_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; sb_end_write(file_inode(file)->i_sb); } /** * kiocb_start_write - get write access to a superblock for async file io * @iocb: the io context we want to submit the write with * * This is a variant of sb_start_write() for async io submission. * Should be matched with a call to kiocb_end_write(). */ static inline void kiocb_start_write(struct kiocb *iocb) { struct inode *inode = file_inode(iocb->ki_filp); sb_start_write(inode->i_sb); /* * Fool lockdep by telling it the lock got released so that it * doesn't complain about the held lock when we return to userspace. */ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); } /** * kiocb_end_write - drop write access to a superblock after async file io * @iocb: the io context we sumbitted the write with * * Should be matched with a call to kiocb_start_write(). */ static inline void kiocb_end_write(struct kiocb *iocb) { struct inode *inode = file_inode(iocb->ki_filp); /* * Tell lockdep we inherited freeze protection from submission thread. */ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); sb_end_write(inode->i_sb); } /* * This is used for regular files where some users -- especially the * currently executed binary in a process, previously handled via * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap * read-write shared) accesses. * * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. * deny_write_access() denies write access to a file. * allow_write_access() re-enables write access to a file. * * The i_writecount field of an inode can have the following values: * 0: no write access, no denied write access * < 0: (-i_writecount) users that denied write access to the file. * > 0: (i_writecount) users that have write access to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to * use {get,deny}_write_access() - these functions check the sign and refuse * to do the change if sign is wrong. */ static inline int get_write_access(struct inode *inode) { return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY; } static inline int deny_write_access(struct file *file) { struct inode *inode = file_inode(file); return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY; } static inline void put_write_access(struct inode * inode) { atomic_dec(&inode->i_writecount); } static inline void allow_write_access(struct file *file) { if (file) atomic_inc(&file_inode(file)->i_writecount); } static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; } #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) static inline void i_readcount_dec(struct inode *inode) { BUG_ON(atomic_dec_return(&inode->i_readcount) < 0); } static inline void i_readcount_inc(struct inode *inode) { atomic_inc(&inode->i_readcount); } #else static inline void i_readcount_dec(struct inode *inode) { return; } static inline void i_readcount_inc(struct inode *inode) { return; } #endif extern int do_pipe_flags(int *, int); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern bool is_subdir(struct dentry *, struct dentry *); extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); /** * is_dot_dotdot - returns true only if @name is "." or ".." * @name: file name to check * @len: length of file name, in bytes */ static inline bool is_dot_dotdot(const char *name, size_t len) { return len && unlikely(name[0] == '.') && (len == 1 || (len == 2 && name[1] == '.')); } #include <linux/err.h> /* needed for stackable file system support */ extern loff_t default_llseek(struct file *file, loff_t offset, int whence); extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); extern void address_space_init_once(struct address_space *mapping); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern int generic_delete_inode(struct inode *inode); static inline int generic_drop_inode(struct inode *inode) { return !inode->i_nlink || inode_unhashed(inode); } extern void d_mark_dontcache(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), void *data); extern struct inode *ilookup(struct super_block *sb, unsigned long ino); extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data); struct inode *iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); struct inode *iget5_locked_rcu(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); extern struct inode *find_inode_nowait(struct super_block *, unsigned long, int (*match)(struct inode *, unsigned long, void *), void *data); extern struct inode *find_inode_rcu(struct super_block *, unsigned long, int (*)(struct inode *, void *), void *); extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void lockdep_annotate_inode_mutex_key(struct inode *inode); #else static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; #endif extern void unlock_new_inode(struct inode *); extern void discard_new_inode(struct inode *); extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); void dump_mapping(const struct address_space *); /* * Userspace may rely on the inode number being non-zero. For example, glibc * simply ignores files with zero i_ino in unlink() and other places. * * As an additional complication, if userspace was compiled with * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the * lower 32 bits, so we need to check that those aren't zero explicitly. With * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but * better safe than sorry. */ static inline bool is_zero_ino(ino_t ino) { return (u32)ino == 0; } extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); /* * This must be used for allocating filesystems specific inodes to set * up the inode reclaim context correctly. */ #define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp) extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } extern void __remove_inode_hash(struct inode *); static inline void remove_inode_hash(struct inode *inode) { if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash)) __remove_inode_hash(inode); } extern void inode_sb_list_add(struct inode *inode); extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to, ssize_t already_read); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *); ssize_t generic_perform_write(struct kiocb *, struct iov_iter *); ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter, ssize_t direct_written, ssize_t buffered_written); ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, rwf_t flags); ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, struct iov_iter *iter); ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, struct iov_iter *iter); /* fs/splice.c */ ssize_t filemap_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); ssize_t copy_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); #define no_llseek NULL extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek(struct file *, loff_t, int); int rw_verify_area(int, struct file *, const loff_t *, size_t); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); extern int stream_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, loff_t file_offset); enum { /* need locking between buffered and direct access */ DIO_LOCKING = 0x01, /* filesystem does not support filling holes */ DIO_SKIP_HOLES = 0x02, }; ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, get_block_t get_block, dio_iodone_t end_io, int flags); static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct iov_iter *iter, get_block_t get_block) { return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, get_block, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } #endif void inode_dio_wait(struct inode *inode); /** * inode_dio_begin - signal start of a direct I/O requests * @inode: inode the direct I/O happens on * * This is called once we've finished processing a direct I/O request, * and is used to wake up callers waiting for direct I/O to be quiesced. */ static inline void inode_dio_begin(struct inode *inode) { atomic_inc(&inode->i_dio_count); } /** * inode_dio_end - signal finish of a direct I/O requests * @inode: inode the direct I/O happens on * * This is called once we've finished processing a direct I/O request, * and is used to wake up callers waiting for direct I/O to be quiesced. */ static inline void inode_dio_end(struct inode *inode) { if (atomic_dec_and_test(&inode->i_dio_count)) wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); } extern void inode_set_flags(struct inode *inode, unsigned int flags, unsigned int mask); extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, unsigned int unit_max); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); static inline loff_t __inode_get_bytes(struct inode *inode) { return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes; } loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); const char *simple_get_link(struct dentry *, struct inode *, struct delayed_call *); extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags); int vfs_fstat(int fd, struct kstat *stat); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, filename, stat, 0); } static inline int vfs_lstat(const char __user *name, struct kstat *stat) { return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); } extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*)(struct super_block *, void *), void *); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern int simple_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct page **pagep, void **fsdata); extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; extern void make_empty_dir_inode(struct inode *inode); extern bool is_empty_dir_inode(struct inode *inode); struct tree_descr { const char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, unsigned long, const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); struct offset_ctx { struct maple_tree mt; unsigned long next_offset; }; void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); int simple_offset_empty(struct dentry *dentry); int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); void simple_offset_destroy(struct offset_ctx *octx); extern const struct file_operations simple_offset_dir_operations; extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set_sb_d_ops(struct super_block *sb); extern int generic_ci_match(const struct inode *parent, const struct qstr *name, const struct qstr *folded_name, const u8 *de_name, u32 de_name_len); static inline bool sb_has_encoding(const struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) return !!sb->s_encoding; #else return false; #endif } int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); void setattr_copy(struct mnt_idmap *, struct inode *inode, const struct iattr *attr); extern int file_update_time(struct file *file); static inline bool vma_is_dax(const struct vm_area_struct *vma) { return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } static inline bool vma_is_fsdax(struct vm_area_struct *vma) { struct inode *inode; if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file) return false; if (!vma_is_dax(vma)) return false; inode = file_inode(vma->vm_file); if (S_ISCHR(inode->i_mode)) return false; /* device-dax */ return true; } static inline int iocb_flags(struct file *file) { int res = 0; if (file->f_flags & O_APPEND) res |= IOCB_APPEND; if (file->f_flags & O_DIRECT) res |= IOCB_DIRECT; if (file->f_flags & O_DSYNC) res |= IOCB_DSYNC; if (file->f_flags & __O_SYNC) res |= IOCB_SYNC; return res; } static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, int rw_type) { int kiocb_flags = 0; /* make sure there's no overlap between RWF and private IOCB flags */ BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD); if (!flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND))) return -EINVAL; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; kiocb_flags |= IOCB_NOIO; } if (flags & RWF_ATOMIC) { if (rw_type != WRITE) return -EOPNOTSUPP; if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) return -EOPNOTSUPP; } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) { if (IS_APPEND(file_inode(ki->ki_filp))) return -EPERM; ki->ki_flags &= ~IOCB_APPEND; } ki->ki_flags |= kiocb_flags; return 0; } /* Transaction based IO helpers */ /* * An argresp is stored in an allocated page and holds the * size of the argument or response, along with its content */ struct simple_transaction_argresp { ssize_t size; char data[]; }; #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp)) char *simple_transaction_get(struct file *file, const char __user *buf, size_t size); ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos); int simple_transaction_release(struct inode *inode, struct file *file); void simple_transaction_set(struct file *file, size_t n); /* * simple attribute files * * These attributes behave similar to those in sysfs: * * Writing to an attribute immediately sets a value, an open file can be * written to multiple times. * * Reading from an attribute creates a buffer from the value that might get * read with multiple read calls. When the attribute has been read * completely, no further read calls are possible until the file is opened * again. * * All attributes contain a text representation of a numeric value * that are accessed with the get() and set() functions. */ #define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ __simple_attr_check_format(__fmt, 0ull); \ return simple_attr_open(inode, file, __get, __set, __fmt); \ } \ static const struct file_operations __fops = { \ .owner = THIS_MODULE, \ .open = __fops ## _open, \ .release = simple_attr_release, \ .read = simple_attr_read, \ .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \ .llseek = generic_file_llseek, \ } #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) #define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) static inline __printf(1, 2) void __simple_attr_check_format(const char *fmt, ...) { /* don't do anything, just let the compiler check the arguments; */ } int simple_attr_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt); int simple_attr_release(struct inode *inode, struct file *file); ssize_t simple_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) #define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) static inline bool is_sxid(umode_t mode) { return mode & (S_ISUID | S_ISGID); } static inline int check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) { if (!(dir->i_mode & S_ISVTX)) return 0; return __check_sticky(idmap, dir, inode); } static inline void inode_has_no_xattr(struct inode *inode) { if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC)) inode->i_flags |= S_NOSEC; } static inline bool is_root_inode(struct inode *inode) { return inode == inode->i_sb->s_root->d_inode; } static inline bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type) { return ctx->actor(ctx, name, namelen, ctx->pos, ino, type); } static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, ".", 1, ctx->pos, file->f_path.dentry->d_inode->i_ino, DT_DIR); } static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, "..", 2, ctx->pos, d_parent_ino(file->f_path.dentry), DT_DIR); } static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) { if (ctx->pos == 0) { if (!dir_emit_dot(file, ctx)) return false; ctx->pos = 1; } if (ctx->pos == 1) { if (!dir_emit_dotdot(file, ctx)) return false; ctx->pos = 2; } return true; } static inline bool dir_relax(struct inode *inode) { inode_unlock(inode); inode_lock(inode); return !IS_DEADDIR(inode); } static inline bool dir_relax_shared(struct inode *inode) { inode_unlock_shared(inode); inode_lock_shared(inode); return !IS_DEADDIR(inode); } extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); /* mm/fadvise.c */ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice); extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); static inline bool vfs_empty_path(int dfd, const char __user *path) { char c; if (dfd < 0) return false; /* We now allow NULL to be used for empty path. */ if (!path) return true; if (unlikely(get_user(c, path))) return false; return !c; } bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos); #endif /* _LINUX_FS_H */
147 116 120 122 122 22 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 // SPDX-License-Identifier: GPL-2.0 /* * Creating audit events from TTY input. * * Copyright (C) 2007 Red Hat, Inc. All rights reserved. * * Authors: Miloslav Trmac <mitr@redhat.com> */ #include <linux/audit.h> #include <linux/slab.h> #include <linux/tty.h> #include "tty.h" struct tty_audit_buf { struct mutex mutex; /* Protects all data below */ dev_t dev; /* The TTY which the data is from */ bool icanon; size_t valid; u8 *data; /* Allocated size N_TTY_BUF_SIZE */ }; static struct tty_audit_buf *tty_audit_buf_ref(void) { struct tty_audit_buf *buf; buf = current->signal->tty_audit_buf; WARN_ON(buf == ERR_PTR(-ESRCH)); return buf; } static struct tty_audit_buf *tty_audit_buf_alloc(void) { struct tty_audit_buf *buf; buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) goto err; buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); if (!buf->data) goto err_buf; mutex_init(&buf->mutex); return buf; err_buf: kfree(buf); err: return NULL; } static void tty_audit_buf_free(struct tty_audit_buf *buf) { WARN_ON(buf->valid != 0); kfree(buf->data); kfree(buf); } static void tty_audit_log(const char *description, dev_t dev, const u8 *data, size_t size) { struct audit_buffer *ab; pid_t pid = task_pid_nr(current); uid_t uid = from_kuid(&init_user_ns, task_uid(current)); uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current)); unsigned int sessionid = audit_get_sessionid(current); char name[TASK_COMM_LEN]; ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_TTY); if (!ab) return; audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u major=%d minor=%d comm=", description, pid, uid, loginuid, sessionid, MAJOR(dev), MINOR(dev)); get_task_comm(name, current); audit_log_untrustedstring(ab, name); audit_log_format(ab, " data="); audit_log_n_hex(ab, data, size); audit_log_end(ab); } /* * tty_audit_buf_push - Push buffered data out * * Generate an audit message from the contents of @buf, which is owned by * the current task. @buf->mutex must be locked. */ static void tty_audit_buf_push(struct tty_audit_buf *buf) { if (buf->valid == 0) return; if (audit_enabled == AUDIT_OFF) { buf->valid = 0; return; } tty_audit_log("tty", buf->dev, buf->data, buf->valid); buf->valid = 0; } /** * tty_audit_exit - Handle a task exit * * Make sure all buffered data is written out and deallocate the buffer. * Only needs to be called if current->signal->tty_audit_buf != %NULL. * * The process is single-threaded at this point; no other threads share * current->signal. */ void tty_audit_exit(void) { struct tty_audit_buf *buf; buf = xchg(&current->signal->tty_audit_buf, ERR_PTR(-ESRCH)); if (!buf) return; tty_audit_buf_push(buf); tty_audit_buf_free(buf); } /* * tty_audit_fork - Copy TTY audit state for a new task * * Set up TTY audit state in @sig from current. @sig needs no locking. */ void tty_audit_fork(struct signal_struct *sig) { sig->audit_tty = current->signal->audit_tty; } /* * tty_audit_tiocsti - Log TIOCSTI */ void tty_audit_tiocsti(const struct tty_struct *tty, u8 ch) { dev_t dev; dev = MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; if (tty_audit_push()) return; if (audit_enabled) tty_audit_log("ioctl=TIOCSTI", dev, &ch, 1); } /* * tty_audit_push - Flush current's pending audit data * * Returns 0 if success, -EPERM if tty audit is disabled */ int tty_audit_push(void) { struct tty_audit_buf *buf; if (~current->signal->audit_tty & AUDIT_TTY_ENABLE) return -EPERM; buf = tty_audit_buf_ref(); if (!IS_ERR_OR_NULL(buf)) { mutex_lock(&buf->mutex); tty_audit_buf_push(buf); mutex_unlock(&buf->mutex); } return 0; } /* * tty_audit_buf_get - Get an audit buffer. * * Get an audit buffer, allocate it if necessary. Return %NULL * if out of memory or ERR_PTR(-ESRCH) if tty_audit_exit() has already * occurred. Otherwise, return a new reference to the buffer. */ static struct tty_audit_buf *tty_audit_buf_get(void) { struct tty_audit_buf *buf; buf = tty_audit_buf_ref(); if (buf) return buf; buf = tty_audit_buf_alloc(); if (buf == NULL) { audit_log_lost("out of memory in TTY auditing"); return NULL; } /* Race to use this buffer, free it if another wins */ if (cmpxchg(&current->signal->tty_audit_buf, NULL, buf) != NULL) tty_audit_buf_free(buf); return tty_audit_buf_ref(); } /* * tty_audit_add_data - Add data for TTY auditing. * * Audit @data of @size from @tty, if necessary. */ void tty_audit_add_data(const struct tty_struct *tty, const void *data, size_t size) { struct tty_audit_buf *buf; unsigned int audit_tty; bool icanon = L_ICANON(tty); dev_t dev; audit_tty = READ_ONCE(current->signal->audit_tty); if (~audit_tty & AUDIT_TTY_ENABLE) return; if (unlikely(size == 0)) return; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) return; if ((~audit_tty & AUDIT_TTY_LOG_PASSWD) && icanon && !L_ECHO(tty)) return; buf = tty_audit_buf_get(); if (IS_ERR_OR_NULL(buf)) return; mutex_lock(&buf->mutex); dev = MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; if (buf->dev != dev || buf->icanon != icanon) { tty_audit_buf_push(buf); buf->dev = dev; buf->icanon = icanon; } do { size_t run; run = N_TTY_BUF_SIZE - buf->valid; if (run > size) run = size; memcpy(buf->data + buf->valid, data, run); buf->valid += run; data += run; size -= run; if (buf->valid == N_TTY_BUF_SIZE) tty_audit_buf_push(buf); } while (size != 0); mutex_unlock(&buf->mutex); }
13 14 5 5 5 5 3 2442 2438 2435 3 2442 1748 1733 8 1 1750 30 5 4 21 11 19 3 15 1490 551 4563 33 1346 1336 57 5 5 5 7 7 4 2210 47 47 46 54 30 46 7 7 17 1 15 16 8 8 8 14 3 8 10 3 6 11 6 6 6 8 8 8 10 1 1 1 1 2 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* * Copyright (C) 2017-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * Copyright Matt Mackall <mpm@selenic.com>, 2003, 2004, 2005 * Copyright Theodore Ts'o, 1994, 1995, 1996, 1997, 1998, 1999. All rights reserved. * * This driver produces cryptographically secure pseudorandom data. It is divided * into roughly six sections, each with a section header: * * - Initialization and readiness waiting. * - Fast key erasure RNG, the "crng". * - Entropy accumulation and extraction routines. * - Entropy collection routines. * - Userspace reader/writer interfaces. * - Sysctl interface. * * The high level overview is that there is one input pool, into which * various pieces of data are hashed. Prior to initialization, some of that * data is then "credited" as having a certain number of bits of entropy. * When enough bits of entropy are available, the hash is finalized and * handed as a key to a stream cipher that expands it indefinitely for * various consumers. This key is periodically refreshed as the various * entropy collectors, described below, add data to the input pool. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/utsname.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/string.h> #include <linux/fcntl.h> #include <linux/slab.h> #include <linux/random.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/interrupt.h> #include <linux/mm.h> #include <linux/nodemask.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/percpu.h> #include <linux/ptrace.h> #include <linux/workqueue.h> #include <linux/irq.h> #include <linux/ratelimit.h> #include <linux/syscalls.h> #include <linux/completion.h> #include <linux/uuid.h> #include <linux/uaccess.h> #include <linux/suspend.h> #include <linux/siphash.h> #include <linux/sched/isolation.h> #include <crypto/chacha.h> #include <crypto/blake2s.h> #ifdef CONFIG_VDSO_GETRANDOM #include <vdso/getrandom.h> #include <vdso/datapage.h> #endif #include <asm/archrandom.h> #include <asm/processor.h> #include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/io.h> /********************************************************************* * * Initialization and readiness waiting. * * Much of the RNG infrastructure is devoted to various dependencies * being able to wait until the RNG has collected enough entropy and * is ready for safe consumption. * *********************************************************************/ /* * crng_init is protected by base_crng->lock, and only increases * its value (from empty->early->ready). */ static enum { CRNG_EMPTY = 0, /* Little to no entropy collected */ CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */ CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */ } crng_init __read_mostly = CRNG_EMPTY; static DEFINE_STATIC_KEY_FALSE(crng_is_ready); #define crng_ready() (static_branch_likely(&crng_is_ready) || crng_init >= CRNG_READY) /* Various types of waiters for crng_init->CRNG_READY transition. */ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static struct fasync_struct *fasync; static ATOMIC_NOTIFIER_HEAD(random_ready_notifier); /* Control how we warn userspace. */ static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE); static int ratelimit_disable __read_mostly = IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); /* * Returns whether or not the input pool has been seeded and thus guaranteed * to supply cryptographically secure random numbers. This applies to: the * /dev/urandom device, the get_random_bytes function, and the get_random_{u8, * u16,u32,u64,long} family of functions. * * Returns: true if the input pool has been seeded. * false if the input pool has not been seeded. */ bool rng_is_initialized(void) { return crng_ready(); } EXPORT_SYMBOL(rng_is_initialized); static void __cold crng_set_ready(struct work_struct *work) { static_branch_enable(&crng_is_ready); } /* Used by wait_for_random_bytes(), and considered an entropy collector, below. */ static void try_to_generate_entropy(void); /* * Wait for the input pool to be seeded and thus guaranteed to supply * cryptographically secure random numbers. This applies to: the /dev/urandom * device, the get_random_bytes function, and the get_random_{u8,u16,u32,u64, * long} family of functions. Using any of these functions without first * calling this function forfeits the guarantee of security. * * Returns: 0 if the input pool has been seeded. * -ERESTARTSYS if the function was interrupted by a signal. */ int wait_for_random_bytes(void) { while (!crng_ready()) { int ret; try_to_generate_entropy(); ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); if (ret) return ret > 0 ? 0 : ret; } return 0; } EXPORT_SYMBOL(wait_for_random_bytes); /* * Add a callback function that will be invoked when the crng is initialised, * or immediately if it already has been. Only use this is you are absolutely * sure it is required. Most users should instead be able to test * `rng_is_initialized()` on demand, or make use of `get_random_bytes_wait()`. */ int __cold execute_with_initialized_rng(struct notifier_block *nb) { unsigned long flags; int ret = 0; spin_lock_irqsave(&random_ready_notifier.lock, flags); if (crng_ready()) nb->notifier_call(nb, 0, NULL); else ret = raw_notifier_chain_register((struct raw_notifier_head *)&random_ready_notifier.head, nb); spin_unlock_irqrestore(&random_ready_notifier.lock, flags); return ret; } #define warn_unseeded_randomness() \ if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ __func__, (void *)_RET_IP_, crng_init) /********************************************************************* * * Fast key erasure RNG, the "crng". * * These functions expand entropy from the entropy extractor into * long streams for external consumption using the "fast key erasure" * RNG described at <https://blog.cr.yp.to/20170723-random.html>. * * There are a few exported interfaces for use by other drivers: * * void get_random_bytes(void *buf, size_t len) * u8 get_random_u8() * u16 get_random_u16() * u32 get_random_u32() * u32 get_random_u32_below(u32 ceil) * u32 get_random_u32_above(u32 floor) * u32 get_random_u32_inclusive(u32 floor, u32 ceil) * u64 get_random_u64() * unsigned long get_random_long() * * These interfaces will return the requested number of random bytes * into the given buffer or as a return value. This is equivalent to * a read from /dev/urandom. The u8, u16, u32, u64, long family of * functions may be higher performance for one-off random integers, * because they do a bit of buffering and do not invoke reseeding * until the buffer is emptied. * *********************************************************************/ enum { CRNG_RESEED_START_INTERVAL = HZ, CRNG_RESEED_INTERVAL = 60 * HZ }; static struct { u8 key[CHACHA_KEY_SIZE] __aligned(__alignof__(long)); unsigned long generation; spinlock_t lock; } base_crng = { .lock = __SPIN_LOCK_UNLOCKED(base_crng.lock) }; struct crng { u8 key[CHACHA_KEY_SIZE]; unsigned long generation; local_lock_t lock; }; static DEFINE_PER_CPU(struct crng, crngs) = { .generation = ULONG_MAX, .lock = INIT_LOCAL_LOCK(crngs.lock), }; /* * Return the interval until the next reseeding, which is normally * CRNG_RESEED_INTERVAL, but during early boot, it is at an interval * proportional to the uptime. */ static unsigned int crng_reseed_interval(void) { static bool early_boot = true; if (unlikely(READ_ONCE(early_boot))) { time64_t uptime = ktime_get_seconds(); if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) WRITE_ONCE(early_boot, false); else return max_t(unsigned int, CRNG_RESEED_START_INTERVAL, (unsigned int)uptime / 2 * HZ); } return CRNG_RESEED_INTERVAL; } /* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */ static void extract_entropy(void *buf, size_t len); /* This extracts a new crng key from the input pool. */ static void crng_reseed(struct work_struct *work) { static DECLARE_DELAYED_WORK(next_reseed, crng_reseed); unsigned long flags; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; /* Immediately schedule the next reseeding, so that it fires sooner rather than later. */ if (likely(system_unbound_wq)) queue_delayed_work(system_unbound_wq, &next_reseed, crng_reseed_interval()); extract_entropy(key, sizeof(key)); /* * We copy the new key into the base_crng, overwriting the old one, * and update the generation counter. We avoid hitting ULONG_MAX, * because the per-cpu crngs are initialized to ULONG_MAX, so this * forces new CPUs that come online to always initialize. */ spin_lock_irqsave(&base_crng.lock, flags); memcpy(base_crng.key, key, sizeof(base_crng.key)); next_gen = base_crng.generation + 1; if (next_gen == ULONG_MAX) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); #ifdef CONFIG_VDSO_GETRANDOM /* base_crng.generation's invalid value is ULONG_MAX, while * _vdso_rng_data.generation's invalid value is 0, so add one to the * former to arrive at the latter. Use smp_store_release so that this * is ordered with the write above to base_crng.generation. Pairs with * the smp_rmb() before the syscall in the vDSO code. */ smp_store_release(&_vdso_rng_data.generation, next_gen + 1); #endif if (!static_branch_likely(&crng_is_ready)) crng_init = CRNG_READY; spin_unlock_irqrestore(&base_crng.lock, flags); memzero_explicit(key, sizeof(key)); } /* * This generates a ChaCha block using the provided key, and then * immediately overwrites that key with half the block. It returns * the resultant ChaCha state to the user, along with the second * half of the block containing 32 bytes of random data that may * be used; random_data_len may not be greater than 32. * * The returned ChaCha state contains within it a copy of the old * key value, at index 4, so the state should always be zeroed out * immediately after using in order to maintain forward secrecy. * If the state cannot be erased in a timely manner, then it is * safer to set the random_data parameter to &chacha_state[4] so * that this function overwrites it before returning. */ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], u8 *random_data, size_t random_data_len) { u8 first_block[CHACHA_BLOCK_SIZE]; BUG_ON(random_data_len > 32); chacha_init_consts(chacha_state); memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); memset(&chacha_state[12], 0, sizeof(u32) * 4); chacha20_block(chacha_state, first_block); memcpy(key, first_block, CHACHA_KEY_SIZE); memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); memzero_explicit(first_block, sizeof(first_block)); } /* * This function returns a ChaCha state that you may use for generating * random data. It also returns up to 32 bytes on its own of random data * that may be used; random_data_len may not be greater than 32. */ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], u8 *random_data, size_t random_data_len) { unsigned long flags; struct crng *crng; BUG_ON(random_data_len > 32); /* * For the fast path, we check whether we're ready, unlocked first, and * then re-check once locked later. In the case where we're really not * ready, we do fast key erasure with the base_crng directly, extracting * when crng_init is CRNG_EMPTY. */ if (!crng_ready()) { bool ready; spin_lock_irqsave(&base_crng.lock, flags); ready = crng_ready(); if (!ready) { if (crng_init == CRNG_EMPTY) extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_fast_key_erasure(base_crng.key, chacha_state, random_data, random_data_len); } spin_unlock_irqrestore(&base_crng.lock, flags); if (!ready) return; } local_lock_irqsave(&crngs.lock, flags); crng = raw_cpu_ptr(&crngs); /* * If our per-cpu crng is older than the base_crng, then it means * somebody reseeded the base_crng. In that case, we do fast key * erasure on the base_crng, and use its output as the new key * for our per-cpu crng. This brings us up to date with base_crng. */ if (unlikely(crng->generation != READ_ONCE(base_crng.generation))) { spin_lock(&base_crng.lock); crng_fast_key_erasure(base_crng.key, chacha_state, crng->key, sizeof(crng->key)); crng->generation = base_crng.generation; spin_unlock(&base_crng.lock); } /* * Finally, when we've made it this far, our per-cpu crng has an up * to date key, and we can do fast key erasure with it to produce * some random data and a ChaCha state for the caller. All other * branches of this function are "unlikely", so most of the time we * should wind up here immediately. */ crng_fast_key_erasure(crng->key, chacha_state, random_data, random_data_len); local_unlock_irqrestore(&crngs.lock, flags); } static void _get_random_bytes(void *buf, size_t len) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 tmp[CHACHA_BLOCK_SIZE]; size_t first_block_len; if (!len) return; first_block_len = min_t(size_t, 32, len); crng_make_state(chacha_state, buf, first_block_len); len -= first_block_len; buf += first_block_len; while (len) { if (len < CHACHA_BLOCK_SIZE) { chacha20_block(chacha_state, tmp); memcpy(buf, tmp, len); memzero_explicit(tmp, sizeof(tmp)); break; } chacha20_block(chacha_state, buf); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; len -= CHACHA_BLOCK_SIZE; buf += CHACHA_BLOCK_SIZE; } memzero_explicit(chacha_state, sizeof(chacha_state)); } /* * This returns random bytes in arbitrary quantities. The quality of the * random bytes is good as /dev/urandom. In order to ensure that the * randomness provided by this function is okay, the function * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ void get_random_bytes(void *buf, size_t len) { warn_unseeded_randomness(); _get_random_bytes(buf, len); } EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(struct iov_iter *iter) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 block[CHACHA_BLOCK_SIZE]; size_t ret = 0, copied; if (unlikely(!iov_iter_count(iter))) return 0; /* * Immediately overwrite the ChaCha key at index 4 with random * bytes, in case userspace causes copy_to_iter() below to sleep * forever, so that we still retain forward secrecy in that case. */ crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); /* * However, if we're doing a read of len <= 32, we don't need to * use chacha_state after, so we can simply return those bytes to * the user directly. */ if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) { ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter); goto out_zero_chacha; } for (;;) { chacha20_block(chacha_state, block); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; copied = copy_to_iter(block, sizeof(block), iter); ret += copied; if (!iov_iter_count(iter) || copied != sizeof(block)) break; BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; cond_resched(); } } memzero_explicit(block, sizeof(block)); out_zero_chacha: memzero_explicit(chacha_state, sizeof(chacha_state)); return ret ? ret : -EFAULT; } /* * Batched entropy returns random integers. The quality of the random * number is good as /dev/urandom. In order to ensure that the randomness * provided by this function is okay, the function wait_for_random_bytes() * should be called and return 0 at least once at any point prior. */ #define DEFINE_BATCHED_ENTROPY(type) \ struct batch_ ##type { \ /* \ * We make this 1.5x a ChaCha block, so that we get the \ * remaining 32 bytes from fast key erasure, plus one full \ * block from the detached ChaCha state. We can increase \ * the size of this later if needed so long as we keep the \ * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. \ */ \ type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \ local_lock_t lock; \ unsigned long generation; \ unsigned int position; \ }; \ \ static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \ .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock), \ .position = UINT_MAX \ }; \ \ type get_random_ ##type(void) \ { \ type ret; \ unsigned long flags; \ struct batch_ ##type *batch; \ unsigned long next_gen; \ \ warn_unseeded_randomness(); \ \ if (!crng_ready()) { \ _get_random_bytes(&ret, sizeof(ret)); \ return ret; \ } \ \ local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \ batch = raw_cpu_ptr(&batched_entropy_##type); \ \ next_gen = READ_ONCE(base_crng.generation); \ if (batch->position >= ARRAY_SIZE(batch->entropy) || \ next_gen != batch->generation) { \ _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \ batch->position = 0; \ batch->generation = next_gen; \ } \ \ ret = batch->entropy[batch->position]; \ batch->entropy[batch->position] = 0; \ ++batch->position; \ local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \ return ret; \ } \ EXPORT_SYMBOL(get_random_ ##type); DEFINE_BATCHED_ENTROPY(u8) DEFINE_BATCHED_ENTROPY(u16) DEFINE_BATCHED_ENTROPY(u32) DEFINE_BATCHED_ENTROPY(u64) u32 __get_random_u32_below(u32 ceil) { /* * This is the slow path for variable ceil. It is still fast, most of * the time, by doing traditional reciprocal multiplication and * opportunistically comparing the lower half to ceil itself, before * falling back to computing a larger bound, and then rejecting samples * whose lower half would indicate a range indivisible by ceil. The use * of `-ceil % ceil` is analogous to `2^32 % ceil`, but is computable * in 32-bits. */ u32 rand = get_random_u32(); u64 mult; /* * This function is technically undefined for ceil == 0, and in fact * for the non-underscored constant version in the header, we build bug * on that. But for the non-constant case, it's convenient to have that * evaluate to being a straight call to get_random_u32(), so that * get_random_u32_inclusive() can work over its whole range without * undefined behavior. */ if (unlikely(!ceil)) return rand; mult = (u64)ceil * rand; if (unlikely((u32)mult < ceil)) { u32 bound = -ceil % ceil; while (unlikely((u32)mult < bound)) mult = (u64)ceil * get_random_u32(); } return mult >> 32; } EXPORT_SYMBOL(__get_random_u32_below); #ifdef CONFIG_SMP /* * This function is called when the CPU is coming up, with entry * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. */ int __cold random_prepare_cpu(unsigned int cpu) { /* * When the cpu comes back online, immediately invalidate both * the per-cpu crng and all batches, so that we serve fresh * randomness. */ per_cpu_ptr(&crngs, cpu)->generation = ULONG_MAX; per_cpu_ptr(&batched_entropy_u8, cpu)->position = UINT_MAX; per_cpu_ptr(&batched_entropy_u16, cpu)->position = UINT_MAX; per_cpu_ptr(&batched_entropy_u32, cpu)->position = UINT_MAX; per_cpu_ptr(&batched_entropy_u64, cpu)->position = UINT_MAX; return 0; } #endif /********************************************************************** * * Entropy accumulation and extraction routines. * * Callers may add entropy via: * * static void mix_pool_bytes(const void *buf, size_t len) * * After which, if added entropy should be credited: * * static void credit_init_bits(size_t bits) * * Finally, extract entropy via: * * static void extract_entropy(void *buf, size_t len) * **********************************************************************/ enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */ POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */ }; static struct { struct blake2s_state hash; spinlock_t lock; unsigned int init_bits; } input_pool = { .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, BLAKE2S_IV5, BLAKE2S_IV6, BLAKE2S_IV7 }, .hash.outlen = BLAKE2S_HASH_SIZE, .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; static void _mix_pool_bytes(const void *buf, size_t len) { blake2s_update(&input_pool.hash, buf, len); } /* * This function adds bytes into the input pool. It does not * update the initialization bit counter; the caller should call * credit_init_bits if this is appropriate. */ static void mix_pool_bytes(const void *buf, size_t len) { unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } /* * This is an HKDF-like construction for using the hashed collected entropy * as a PRF key, that's then expanded block-by-block. */ static void extract_entropy(void *buf, size_t len) { unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; struct { unsigned long rdseed[32 / sizeof(long)]; size_t counter; } block; size_t i, longs; for (i = 0; i < ARRAY_SIZE(block.rdseed);) { longs = arch_get_random_seed_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i); if (longs) { i += longs; continue; } longs = arch_get_random_longs(&block.rdseed[i], ARRAY_SIZE(block.rdseed) - i); if (longs) { i += longs; continue; } block.rdseed[i++] = random_get_entropy(); } spin_lock_irqsave(&input_pool.lock, flags); /* seed = HASHPRF(last_key, entropy_input) */ blake2s_final(&input_pool.hash, seed); /* next_key = HASHPRF(seed, RDSEED || 0) */ block.counter = 0; blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed)); blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key)); spin_unlock_irqrestore(&input_pool.lock, flags); memzero_explicit(next_key, sizeof(next_key)); while (len) { i = min_t(size_t, len, BLAKE2S_HASH_SIZE); /* output = HASHPRF(seed, RDSEED || ++counter) */ ++block.counter; blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); len -= i; buf += i; } memzero_explicit(seed, sizeof(seed)); memzero_explicit(&block, sizeof(block)); } #define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits) static void __cold _credit_init_bits(size_t bits) { static DECLARE_WORK(set_ready, crng_set_ready); unsigned int new, orig, add; unsigned long flags; if (!bits) return; add = min_t(size_t, bits, POOL_BITS); orig = READ_ONCE(input_pool.init_bits); do { new = min_t(unsigned int, POOL_BITS, orig + add); } while (!try_cmpxchg(&input_pool.init_bits, &orig, new)); if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */ if (static_key_initialized && system_unbound_wq) queue_work(system_unbound_wq, &set_ready); atomic_notifier_call_chain(&random_ready_notifier, 0, NULL); #ifdef CONFIG_VDSO_GETRANDOM WRITE_ONCE(_vdso_rng_data.is_ready, true); #endif wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); pr_notice("crng init done\n"); if (urandom_warning.missed) pr_notice("%d urandom warning(s) missed due to ratelimiting\n", urandom_warning.missed); } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { spin_lock_irqsave(&base_crng.lock, flags); /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */ if (crng_init == CRNG_EMPTY) { extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_init = CRNG_EARLY; } spin_unlock_irqrestore(&base_crng.lock, flags); } } /********************************************************************** * * Entropy collection routines. * * The following exported functions are used for pushing entropy into * the above entropy accumulation routines: * * void add_device_randomness(const void *buf, size_t len); * void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after); * void add_bootloader_randomness(const void *buf, size_t len); * void add_vmfork_randomness(const void *unique_vm_id, size_t len); * void add_interrupt_randomness(int irq); * void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); * void add_disk_randomness(struct gendisk *disk); * * add_device_randomness() adds data to the input pool that * is likely to differ between two devices (or possibly even per boot). * This would be things like MAC addresses or serial numbers, or the * read-out of the RTC. This does *not* credit any actual entropy to * the pool, but it initializes the pool to different values for devices * that might otherwise be identical and have very little entropy * available to them (particularly common in the embedded world). * * add_hwgenerator_randomness() is for true hardware RNGs, and will credit * entropy as specified by the caller. If the entropy pool is full it will * block until more entropy is needed. * * add_bootloader_randomness() is called by bootloader drivers, such as EFI * and device tree, and credits its input depending on whether or not the * command line option 'random.trust_bootloader'. * * add_vmfork_randomness() adds a unique (but not necessarily secret) ID * representing the current instance of a VM to the pool, without crediting, * and then force-reseeds the crng so that it takes effect immediately. * * add_interrupt_randomness() uses the interrupt timing as random * inputs to the entropy pool. Using the cycle counters and the irq source * as inputs, it feeds the input pool roughly once a second or after 64 * interrupts, crediting 1 bit of entropy for whichever comes first. * * add_input_randomness() uses the input layer interrupt timing, as well * as the event type information from the hardware. * * add_disk_randomness() uses what amounts to the seek time of block * layer request events, on a per-disk_devt basis, as input to the * entropy pool. Note that high-speed solid state drives with very low * seek times do not make for good sources of entropy, as their seek * times are usually fairly consistent. * * The last two routines try to estimate how many bits of entropy * to credit. They do this by keeping track of the first and second * order deltas of the event timings. * **********************************************************************/ static bool trust_cpu __initdata = true; static bool trust_bootloader __initdata = true; static int __init parse_trust_cpu(char *arg) { return kstrtobool(arg, &trust_cpu); } static int __init parse_trust_bootloader(char *arg) { return kstrtobool(arg, &trust_bootloader); } early_param("random.trust_cpu", parse_trust_cpu); early_param("random.trust_bootloader", parse_trust_bootloader); static int random_pm_notification(struct notifier_block *nb, unsigned long action, void *data) { unsigned long flags, entropy = random_get_entropy(); /* * Encode a representation of how long the system has been suspended, * in a way that is distinct from prior system suspends. */ ktime_t stamps[] = { ktime_get(), ktime_get_boottime(), ktime_get_real() }; spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&action, sizeof(action)); _mix_pool_bytes(stamps, sizeof(stamps)); _mix_pool_bytes(&entropy, sizeof(entropy)); spin_unlock_irqrestore(&input_pool.lock, flags); if (crng_ready() && (action == PM_RESTORE_PREPARE || (action == PM_POST_SUSPEND && !IS_ENABLED(CONFIG_PM_AUTOSLEEP) && !IS_ENABLED(CONFIG_PM_USERSPACE_AUTOSLEEP)))) { crng_reseed(NULL); pr_notice("crng reseeded on system resumption\n"); } return 0; } static struct notifier_block pm_notifier = { .notifier_call = random_pm_notification }; /* * This is called extremely early, before time keeping functionality is * available, but arch randomness is. Interrupts are not yet enabled. */ void __init random_init_early(const char *command_line) { unsigned long entropy[BLAKE2S_BLOCK_SIZE / sizeof(long)]; size_t i, longs, arch_bits; #if defined(LATENT_ENTROPY_PLUGIN) static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy; _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); #endif for (i = 0, arch_bits = sizeof(entropy) * 8; i < ARRAY_SIZE(entropy);) { longs = arch_get_random_seed_longs(entropy, ARRAY_SIZE(entropy) - i); if (longs) { _mix_pool_bytes(entropy, sizeof(*entropy) * longs); i += longs; continue; } longs = arch_get_random_longs(entropy, ARRAY_SIZE(entropy) - i); if (longs) { _mix_pool_bytes(entropy, sizeof(*entropy) * longs); i += longs; continue; } arch_bits -= sizeof(*entropy) * 8; ++i; } _mix_pool_bytes(init_utsname(), sizeof(*(init_utsname()))); _mix_pool_bytes(command_line, strlen(command_line)); /* Reseed if already seeded by earlier phases. */ if (crng_ready()) crng_reseed(NULL); else if (trust_cpu) _credit_init_bits(arch_bits); } /* * This is called a little bit after the prior function, and now there is * access to timestamps counters. Interrupts are not yet enabled. */ void __init random_init(void) { unsigned long entropy = random_get_entropy(); ktime_t now = ktime_get_real(); _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(&entropy, sizeof(entropy)); add_latent_entropy(); /* * If we were initialized by the cpu or bootloader before jump labels * or workqueues are initialized, then we should enable the static * branch here, where it's guaranteed that these have been initialized. */ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY) crng_set_ready(NULL); /* Reseed if already seeded by earlier phases. */ if (crng_ready()) crng_reseed(NULL); WARN_ON(register_pm_notifier(&pm_notifier)); WARN(!entropy, "Missing cycle counter and fallback timer; RNG " "entropy collection will consequently suffer."); } /* * Add device- or boot-specific data to the input pool to help * initialize it. * * None of this adds any entropy; it is meant to avoid the problem of * the entropy pool having similar initial state across largely * identical devices. */ void add_device_randomness(const void *buf, size_t len) { unsigned long entropy = random_get_entropy(); unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&entropy, sizeof(entropy)); _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); /* * Interface for in-kernel drivers of true hardware RNGs. Those devices * may produce endless random bits, so this function will sleep for * some amount of time after, if the sleep_after parameter is true. */ void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after) { mix_pool_bytes(buf, len); credit_init_bits(entropy); /* * Throttle writing to once every reseed interval, unless we're not yet * initialized or no entropy is credited. */ if (sleep_after && !kthread_should_stop() && (crng_ready() || !entropy)) schedule_timeout_interruptible(crng_reseed_interval()); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); /* * Handle random seed passed by bootloader, and credit it depending * on the command line option 'random.trust_bootloader'. */ void __init add_bootloader_randomness(const void *buf, size_t len) { mix_pool_bytes(buf, len); if (trust_bootloader) credit_init_bits(len * 8); } #if IS_ENABLED(CONFIG_VMGENID) static BLOCKING_NOTIFIER_HEAD(vmfork_chain); /* * Handle a new unique VM ID, which is unique, not secret, so we * don't credit it, but we do immediately force a reseed after so * that it's used by the crng posthaste. */ void __cold add_vmfork_randomness(const void *unique_vm_id, size_t len) { add_device_randomness(unique_vm_id, len); if (crng_ready()) { crng_reseed(NULL); pr_notice("crng reseeded due to virtual machine fork\n"); } blocking_notifier_call_chain(&vmfork_chain, 0, NULL); } #if IS_MODULE(CONFIG_VMGENID) EXPORT_SYMBOL_GPL(add_vmfork_randomness); #endif int __cold register_random_vmfork_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&vmfork_chain, nb); } EXPORT_SYMBOL_GPL(register_random_vmfork_notifier); int __cold unregister_random_vmfork_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&vmfork_chain, nb); } EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier); #endif struct fast_pool { unsigned long pool[4]; unsigned long last; unsigned int count; struct timer_list mix; }; static void mix_interrupt_randomness(struct timer_list *work); static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { #ifdef CONFIG_64BIT #define FASTMIX_PERM SIPHASH_PERMUTATION .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 }, #else #define FASTMIX_PERM HSIPHASH_PERMUTATION .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 }, #endif .mix = __TIMER_INITIALIZER(mix_interrupt_randomness, 0) }; /* * This is [Half]SipHash-1-x, starting from an empty key. Because * the key is fixed, it assumes that its inputs are non-malicious, * and therefore this has no security on its own. s represents the * four-word SipHash state, while v represents a two-word input. */ static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) { s[3] ^= v1; FASTMIX_PERM(s[0], s[1], s[2], s[3]); s[0] ^= v1; s[3] ^= v2; FASTMIX_PERM(s[0], s[1], s[2], s[3]); s[0] ^= v2; } #ifdef CONFIG_SMP /* * This function is called when the CPU has just come online, with * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. */ int __cold random_online_cpu(unsigned int cpu) { /* * During CPU shutdown and before CPU onlining, add_interrupt_ * randomness() may schedule mix_interrupt_randomness(), and * set the MIX_INFLIGHT flag. However, because the worker can * be scheduled on a different CPU during this period, that * flag will never be cleared. For that reason, we zero out * the flag here, which runs just after workqueues are onlined * for the CPU again. This also has the effect of setting the * irq randomness count to zero so that new accumulated irqs * are fresh. */ per_cpu_ptr(&irq_randomness, cpu)->count = 0; return 0; } #endif static void mix_interrupt_randomness(struct timer_list *work) { struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); /* * The size of the copied stack pool is explicitly 2 longs so that we * only ever ingest half of the siphash output each time, retaining * the other half as the next "key" that carries over. The entropy is * supposed to be sufficiently dispersed between bits so on average * we don't wind up "losing" some. */ unsigned long pool[2]; unsigned int count; /* Check to see if we're running on the wrong CPU due to hotplug. */ local_irq_disable(); if (fast_pool != this_cpu_ptr(&irq_randomness)) { local_irq_enable(); return; } /* * Copy the pool to the stack so that the mixer always has a * consistent view, before we reenable irqs again. */ memcpy(pool, fast_pool->pool, sizeof(pool)); count = fast_pool->count; fast_pool->count = 0; fast_pool->last = jiffies; local_irq_enable(); mix_pool_bytes(pool, sizeof(pool)); credit_init_bits(clamp_t(unsigned int, (count & U16_MAX) / 64, 1, sizeof(pool) * 8)); memzero_explicit(pool, sizeof(pool)); } void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; unsigned long entropy = random_get_entropy(); struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; fast_mix(fast_pool->pool, entropy, (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq)); new_count = ++fast_pool->count; if (new_count & MIX_INFLIGHT) return; if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ)) return; fast_pool->count |= MIX_INFLIGHT; if (!timer_pending(&fast_pool->mix)) { fast_pool->mix.expires = jiffies; add_timer_on(&fast_pool->mix, raw_smp_processor_id()); } } EXPORT_SYMBOL_GPL(add_interrupt_randomness); /* There is one of these per entropy source */ struct timer_rand_state { unsigned long last_time; long last_delta, last_delta2; }; /* * This function adds entropy to the entropy "pool" by using timing * delays. It uses the timer_rand_state structure to make an estimate * of how many bits of entropy this call has added to the pool. The * value "num" is also added to the pool; it should somehow describe * the type of event that just happened. */ static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) { unsigned long entropy = random_get_entropy(), now = jiffies, flags; long delta, delta2, delta3; unsigned int bits; /* * If we're in a hard IRQ, add_interrupt_randomness() will be called * sometime after, so mix into the fast pool. */ if (in_hardirq()) { fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num); } else { spin_lock_irqsave(&input_pool.lock, flags); _mix_pool_bytes(&entropy, sizeof(entropy)); _mix_pool_bytes(&num, sizeof(num)); spin_unlock_irqrestore(&input_pool.lock, flags); } if (crng_ready()) return; /* * Calculate number of bits of randomness we probably added. * We take into account the first, second and third-order deltas * in order to make our estimate. */ delta = now - READ_ONCE(state->last_time); WRITE_ONCE(state->last_time, now); delta2 = delta - READ_ONCE(state->last_delta); WRITE_ONCE(state->last_delta, delta); delta3 = delta2 - READ_ONCE(state->last_delta2); WRITE_ONCE(state->last_delta2, delta2); if (delta < 0) delta = -delta; if (delta2 < 0) delta2 = -delta2; if (delta3 < 0) delta3 = -delta3; if (delta > delta2) delta = delta2; if (delta > delta3) delta = delta3; /* * delta is now minimum absolute delta. Round down by 1 bit * on general principles, and limit entropy estimate to 11 bits. */ bits = min(fls(delta >> 1), 11); /* * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness() * will run after this, which uses a different crediting scheme of 1 bit * per every 64 interrupts. In order to let that function do accounting * close to the one in this function, we credit a full 64/64 bit per bit, * and then subtract one to account for the extra one added. */ if (in_hardirq()) this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1; else _credit_init_bits(bits); } void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) { static unsigned char last_value; static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; /* Ignore autorepeat and the like. */ if (value == last_value) return; last_value = value; add_timer_randomness(&input_timer_state, (type << 4) ^ code ^ (code >> 4) ^ value); } EXPORT_SYMBOL_GPL(add_input_randomness); #ifdef CONFIG_BLOCK void add_disk_randomness(struct gendisk *disk) { if (!disk || !disk->random) return; /* First major is 1, so we get >= 0x200 here. */ add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } EXPORT_SYMBOL_GPL(add_disk_randomness); void __cold rand_initialize_disk(struct gendisk *disk) { struct timer_rand_state *state; /* * If kzalloc returns null, we just won't use that entropy * source. */ state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); if (state) { state->last_time = INITIAL_JIFFIES; disk->random = state; } } #endif struct entropy_timer_state { unsigned long entropy; struct timer_list timer; atomic_t samples; unsigned int samples_per_bit; }; /* * Each time the timer fires, we expect that we got an unpredictable jump in * the cycle counter. Even if the timer is running on another CPU, the timer * activity will be touching the stack of the CPU that is generating entropy. * * Note that we don't re-arm the timer in the timer itself - we are happy to be * scheduled away, since that just makes the load more complex, but we do not * want the timer to keep ticking unless the entropy loop is running. * * So the re-arming always happens in the entropy loop itself. */ static void __cold entropy_timer(struct timer_list *timer) { struct entropy_timer_state *state = container_of(timer, struct entropy_timer_state, timer); unsigned long entropy = random_get_entropy(); mix_pool_bytes(&entropy, sizeof(entropy)); if (atomic_inc_return(&state->samples) % state->samples_per_bit == 0) credit_init_bits(1); } /* * If we have an actual cycle counter, see if we can generate enough entropy * with timing noise. */ static void __cold try_to_generate_entropy(void) { enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 15 }; u8 stack_bytes[sizeof(struct entropy_timer_state) + SMP_CACHE_BYTES - 1]; struct entropy_timer_state *stack = PTR_ALIGN((void *)stack_bytes, SMP_CACHE_BYTES); unsigned int i, num_different = 0; unsigned long last = random_get_entropy(); int cpu = -1; for (i = 0; i < NUM_TRIAL_SAMPLES - 1; ++i) { stack->entropy = random_get_entropy(); if (stack->entropy != last) ++num_different; last = stack->entropy; } stack->samples_per_bit = DIV_ROUND_UP(NUM_TRIAL_SAMPLES, num_different + 1); if (stack->samples_per_bit > MAX_SAMPLES_PER_BIT) return; atomic_set(&stack->samples, 0); timer_setup_on_stack(&stack->timer, entropy_timer, 0); while (!crng_ready() && !signal_pending(current)) { /* * Check !timer_pending() and then ensure that any previous callback has finished * executing by checking try_to_del_timer_sync(), before queueing the next one. */ if (!timer_pending(&stack->timer) && try_to_del_timer_sync(&stack->timer) >= 0) { struct cpumask timer_cpus; unsigned int num_cpus; /* * Preemption must be disabled here, both to read the current CPU number * and to avoid scheduling a timer on a dead CPU. */ preempt_disable(); /* Only schedule callbacks on timer CPUs that are online. */ cpumask_and(&timer_cpus, housekeeping_cpumask(HK_TYPE_TIMER), cpu_online_mask); num_cpus = cpumask_weight(&timer_cpus); /* In very bizarre case of misconfiguration, fallback to all online. */ if (unlikely(num_cpus == 0)) { timer_cpus = *cpu_online_mask; num_cpus = cpumask_weight(&timer_cpus); } /* Basic CPU round-robin, which avoids the current CPU. */ do { cpu = cpumask_next(cpu, &timer_cpus); if (cpu >= nr_cpu_ids) cpu = cpumask_first(&timer_cpus); } while (cpu == smp_processor_id() && num_cpus > 1); /* Expiring the timer at `jiffies` means it's the next tick. */ stack->timer.expires = jiffies; add_timer_on(&stack->timer, cpu); preempt_enable(); } mix_pool_bytes(&stack->entropy, sizeof(stack->entropy)); schedule(); stack->entropy = random_get_entropy(); } mix_pool_bytes(&stack->entropy, sizeof(stack->entropy)); del_timer_sync(&stack->timer); destroy_timer_on_stack(&stack->timer); } /********************************************************************** * * Userspace reader/writer interfaces. * * getrandom(2) is the primary modern interface into the RNG and should * be used in preference to anything else. * * Reading from /dev/random has the same functionality as calling * getrandom(2) with flags=0. In earlier versions, however, it had * vastly different semantics and should therefore be avoided, to * prevent backwards compatibility issues. * * Reading from /dev/urandom has the same functionality as calling * getrandom(2) with flags=GRND_INSECURE. Because it does not block * waiting for the RNG to be ready, it should not be used. * * Writing to either /dev/random or /dev/urandom adds entropy to * the input pool but does not credit it. * * Polling on /dev/random indicates when the RNG is initialized, on * the read side, and when it wants new entropy, on the write side. * * Both /dev/random and /dev/urandom have the same set of ioctls for * adding entropy, getting the entropy count, zeroing the count, and * reseeding the crng. * **********************************************************************/ SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { struct iov_iter iter; int ret; if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; /* * Requesting insecure and blocking randomness at the same time makes * no sense. */ if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) return -EINVAL; if (!crng_ready() && !(flags & GRND_INSECURE)) { if (flags & GRND_NONBLOCK) return -EAGAIN; ret = wait_for_random_bytes(); if (unlikely(ret)) return ret; } ret = import_ubuf(ITER_DEST, ubuf, len, &iter); if (unlikely(ret)) return ret; return get_random_bytes_user(&iter); } static __poll_t random_poll(struct file *file, poll_table *wait) { poll_wait(file, &crng_init_wait, wait); return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } static ssize_t write_pool_user(struct iov_iter *iter) { u8 block[BLAKE2S_BLOCK_SIZE]; ssize_t ret = 0; size_t copied; if (unlikely(!iov_iter_count(iter))) return 0; for (;;) { copied = copy_from_iter(block, sizeof(block), iter); ret += copied; mix_pool_bytes(block, copied); if (!iov_iter_count(iter) || copied != sizeof(block)) break; BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; cond_resched(); } } memzero_explicit(block, sizeof(block)); return ret ? ret : -EFAULT; } static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter) { return write_pool_user(iter); } static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { static int maxwarn = 10; /* * Opportunistically attempt to initialize the RNG on platforms that * have fast cycle counters, but don't (for now) require it to succeed. */ if (!crng_ready()) try_to_generate_entropy(); if (!crng_ready()) { if (!ratelimit_disable && maxwarn <= 0) ++urandom_warning.missed; else if (ratelimit_disable || __ratelimit(&urandom_warning)) { --maxwarn; pr_notice("%s: uninitialized urandom read (%zu bytes read)\n", current->comm, iov_iter_count(iter)); } } return get_random_bytes_user(iter); } static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { int ret; if (!crng_ready() && ((kiocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) || (kiocb->ki_filp->f_flags & O_NONBLOCK))) return -EAGAIN; ret = wait_for_random_bytes(); if (ret != 0) return ret; return get_random_bytes_user(iter); } static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int __user *p = (int __user *)arg; int ent_count; switch (cmd) { case RNDGETENTCNT: /* Inherently racy, no point locking. */ if (put_user(input_pool.init_bits, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; if (ent_count < 0) return -EINVAL; credit_init_bits(ent_count); return 0; case RNDADDENTROPY: { struct iov_iter iter; ssize_t ret; int len; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ent_count, p++)) return -EFAULT; if (ent_count < 0) return -EINVAL; if (get_user(len, p++)) return -EFAULT; ret = import_ubuf(ITER_SOURCE, p, len, &iter); if (unlikely(ret)) return ret; ret = write_pool_user(&iter); if (unlikely(ret < 0)) return ret; /* Since we're crediting, enforce that it was all written into the pool. */ if (unlikely(ret != len)) return -EFAULT; credit_init_bits(ent_count); return 0; } case RNDZAPENTCNT: case RNDCLEARPOOL: /* No longer has any effect. */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!crng_ready()) return -ENODATA; crng_reseed(NULL); return 0; default: return -EINVAL; } } static int random_fasync(int fd, struct file *filp, int on) { return fasync_helper(fd, filp, on, &fasync); } const struct file_operations random_fops = { .read_iter = random_read_iter, .write_iter = random_write_iter, .poll = random_poll, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, }; const struct file_operations urandom_fops = { .read_iter = urandom_read_iter, .write_iter = random_write_iter, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, .splice_read = copy_splice_read, .splice_write = iter_file_splice_write, }; /******************************************************************** * * Sysctl interface. * * These are partly unused legacy knobs with dummy values to not break * userspace and partly still useful things. They are usually accessible * in /proc/sys/kernel/random/ and are as follows: * * - boot_id - a UUID representing the current boot. * * - uuid - a random UUID, different each time the file is read. * * - poolsize - the number of bits of entropy that the input pool can * hold, tied to the POOL_BITS constant. * * - entropy_avail - the number of bits of entropy currently in the * input pool. Always <= poolsize. * * - write_wakeup_threshold - the amount of entropy in the input pool * below which write polls to /dev/random will unblock, requesting * more entropy, tied to the POOL_READY_BITS constant. It is writable * to avoid breaking old userspaces, but writing to it does not * change any behavior of the RNG. * * - urandom_min_reseed_secs - fixed to the value CRNG_RESEED_INTERVAL. * It is writable to avoid breaking old userspaces, but writing * to it does not change any behavior of the RNG. * ********************************************************************/ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; static int sysctl_random_write_wakeup_bits = POOL_READY_BITS; static int sysctl_poolsize = POOL_BITS; static u8 sysctl_bootid[UUID_SIZE]; /* * This function is used to return both the bootid UUID, and random * UUID. The difference is in whether table->data is NULL; if it is, * then a new UUID is generated and returned to the user. */ static int proc_do_uuid(const struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { u8 tmp_uuid[UUID_SIZE], *uuid; char uuid_string[UUID_STRING_LEN + 1]; struct ctl_table fake_table = { .data = uuid_string, .maxlen = UUID_STRING_LEN }; if (write) return -EPERM; uuid = table->data; if (!uuid) { uuid = tmp_uuid; generate_random_uuid(uuid); } else { static DEFINE_SPINLOCK(bootid_spinlock); spin_lock(&bootid_spinlock); if (!uuid[8]) generate_random_uuid(uuid); spin_unlock(&bootid_spinlock); } snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); return proc_dostring(&fake_table, 0, buf, lenp, ppos); } /* The same as proc_dointvec, but writes don't change anything. */ static int proc_do_rointvec(const struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos); } static struct ctl_table random_table[] = { { .procname = "poolsize", .data = &sysctl_poolsize, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, { .procname = "entropy_avail", .data = &input_pool.init_bits, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, { .procname = "write_wakeup_threshold", .data = &sysctl_random_write_wakeup_bits, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_rointvec, }, { .procname = "urandom_min_reseed_secs", .data = &sysctl_random_min_urandom_seed, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_rointvec, }, { .procname = "boot_id", .data = &sysctl_bootid, .mode = 0444, .proc_handler = proc_do_uuid, }, { .procname = "uuid", .mode = 0444, .proc_handler = proc_do_uuid, }, }; /* * random_init() is called before sysctl_init(), * so we cannot call register_sysctl_init() in random_init() */ static int __init random_sysctls_init(void) { register_sysctl_init("kernel/random", random_table); return 0; } device_initcall(random_sysctls_init); #endif
62 62 1 1 1 1 1 1 1 1 1 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 // SPDX-License-Identifier: GPL-2.0 /* * scsi_scan.c * * Copyright (C) 2000 Eric Youngdale, * Copyright (C) 2002 Patrick Mansfield * * The general scanning/probing algorithm is as follows, exceptions are * made to it depending on device specific flags, compilation options, and * global variable (boot or module load time) settings. * * A specific LUN is scanned via an INQUIRY command; if the LUN has a * device attached, a scsi_device is allocated and setup for it. * * For every id of every channel on the given host: * * Scan LUN 0; if the target responds to LUN 0 (even if there is no * device or storage attached to LUN 0): * * If LUN 0 has a device attached, allocate and setup a * scsi_device for it. * * If target is SCSI-3 or up, issue a REPORT LUN, and scan * all of the LUNs returned by the REPORT LUN; else, * sequentially scan LUNs up until some maximum is reached, * or a LUN is seen that cannot have a device attached to it. */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/spinlock.h> #include <linux/async.h> #include <linux/slab.h> #include <asm/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_devinfo.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_dh.h> #include <scsi/scsi_eh.h> #include "scsi_priv.h" #include "scsi_logging.h" #define ALLOC_FAILURE_MSG KERN_ERR "%s: Allocation failure during" \ " SCSI scanning, some SCSI devices might not be configured\n" /* * Default timeout */ #define SCSI_TIMEOUT (2*HZ) #define SCSI_REPORT_LUNS_TIMEOUT (30*HZ) /* * Prefix values for the SCSI id's (stored in sysfs name field) */ #define SCSI_UID_SER_NUM 'S' #define SCSI_UID_UNKNOWN 'Z' /* * Return values of some of the scanning functions. * * SCSI_SCAN_NO_RESPONSE: no valid response received from the target, this * includes allocation or general failures preventing IO from being sent. * * SCSI_SCAN_TARGET_PRESENT: target responded, but no device is available * on the given LUN. * * SCSI_SCAN_LUN_PRESENT: target responded, and a device is available on a * given LUN. */ #define SCSI_SCAN_NO_RESPONSE 0 #define SCSI_SCAN_TARGET_PRESENT 1 #define SCSI_SCAN_LUN_PRESENT 2 static const char *scsi_null_device_strs = "nullnullnullnull"; #define MAX_SCSI_LUNS 512 static u64 max_scsi_luns = MAX_SCSI_LUNS; module_param_named(max_luns, max_scsi_luns, ullong, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(max_luns, "last scsi LUN (should be between 1 and 2^64-1)"); #ifdef CONFIG_SCSI_SCAN_ASYNC #define SCSI_SCAN_TYPE_DEFAULT "async" #else #define SCSI_SCAN_TYPE_DEFAULT "sync" #endif static char scsi_scan_type[7] = SCSI_SCAN_TYPE_DEFAULT; module_param_string(scan, scsi_scan_type, sizeof(scsi_scan_type), S_IRUGO|S_IWUSR); MODULE_PARM_DESC(scan, "sync, async, manual, or none. " "Setting to 'manual' disables automatic scanning, but allows " "for manual device scan via the 'scan' sysfs attribute."); static unsigned int scsi_inq_timeout = SCSI_TIMEOUT/HZ + 18; module_param_named(inq_timeout, scsi_inq_timeout, uint, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(inq_timeout, "Timeout (in seconds) waiting for devices to answer INQUIRY." " Default is 20. Some devices may need more; most need less."); /* This lock protects only this list */ static DEFINE_SPINLOCK(async_scan_lock); static LIST_HEAD(scanning_hosts); struct async_scan_data { struct list_head list; struct Scsi_Host *shost; struct completion prev_finished; }; /* * scsi_enable_async_suspend - Enable async suspend and resume */ void scsi_enable_async_suspend(struct device *dev) { /* * If a user has disabled async probing a likely reason is due to a * storage enclosure that does not inject staggered spin-ups. For * safety, make resume synchronous as well in that case. */ if (strncmp(scsi_scan_type, "async", 5) != 0) return; /* Enable asynchronous suspend and resume. */ device_enable_async_suspend(dev); } /** * scsi_complete_async_scans - Wait for asynchronous scans to complete * * When this function returns, any host which started scanning before * this function was called will have finished its scan. Hosts which * started scanning after this function was called may or may not have * finished. */ int scsi_complete_async_scans(void) { struct async_scan_data *data; do { if (list_empty(&scanning_hosts)) return 0; /* If we can't get memory immediately, that's OK. Just * sleep a little. Even if we never get memory, the async * scans will finish eventually. */ data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) msleep(1); } while (!data); data->shost = NULL; init_completion(&data->prev_finished); spin_lock(&async_scan_lock); /* Check that there's still somebody else on the list */ if (list_empty(&scanning_hosts)) goto done; list_add_tail(&data->list, &scanning_hosts); spin_unlock(&async_scan_lock); printk(KERN_INFO "scsi: waiting for bus probes to complete ...\n"); wait_for_completion(&data->prev_finished); spin_lock(&async_scan_lock); list_del(&data->list); if (!list_empty(&scanning_hosts)) { struct async_scan_data *next = list_entry(scanning_hosts.next, struct async_scan_data, list); complete(&next->prev_finished); } done: spin_unlock(&async_scan_lock); kfree(data); return 0; } /** * scsi_unlock_floptical - unlock device via a special MODE SENSE command * @sdev: scsi device to send command to * @result: area to store the result of the MODE SENSE * * Description: * Send a vendor specific MODE SENSE (not a MODE SELECT) command. * Called for BLIST_KEY devices. **/ static void scsi_unlock_floptical(struct scsi_device *sdev, unsigned char *result) { unsigned char scsi_cmd[MAX_COMMAND_SIZE]; sdev_printk(KERN_NOTICE, sdev, "unlocking floptical drive\n"); scsi_cmd[0] = MODE_SENSE; scsi_cmd[1] = 0; scsi_cmd[2] = 0x2e; scsi_cmd[3] = 0; scsi_cmd[4] = 0x2a; /* size */ scsi_cmd[5] = 0; scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, result, 0x2a, SCSI_TIMEOUT, 3, NULL); } static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, unsigned int depth) { int new_shift = sbitmap_calculate_shift(depth); bool need_alloc = !sdev->budget_map.map; bool need_free = false; int ret; struct sbitmap sb_backup; depth = min_t(unsigned int, depth, scsi_device_max_queue_depth(sdev)); /* * realloc if new shift is calculated, which is caused by setting * up one new default queue depth after calling ->device_configure */ if (!need_alloc && new_shift != sdev->budget_map.shift) need_alloc = need_free = true; if (!need_alloc) return 0; /* * Request queue has to be frozen for reallocating budget map, * and here disk isn't added yet, so freezing is pretty fast */ if (need_free) { blk_mq_freeze_queue(sdev->request_queue); sb_backup = sdev->budget_map; } ret = sbitmap_init_node(&sdev->budget_map, scsi_device_max_queue_depth(sdev), new_shift, GFP_KERNEL, sdev->request_queue->node, false, true); if (!ret) sbitmap_resize(&sdev->budget_map, depth); if (need_free) { if (ret) sdev->budget_map = sb_backup; else sbitmap_free(&sb_backup); ret = 0; blk_mq_unfreeze_queue(sdev->request_queue); } return ret; } /** * scsi_alloc_sdev - allocate and setup a scsi_Device * @starget: which target to allocate a &scsi_device for * @lun: which lun * @hostdata: usually NULL and set by ->slave_alloc instead * * Description: * Allocate, initialize for io, and return a pointer to a scsi_Device. * Stores the @shost, @channel, @id, and @lun in the scsi_Device, and * adds scsi_Device to the appropriate list. * * Return value: * scsi_Device pointer, or NULL on failure. **/ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, u64 lun, void *hostdata) { unsigned int depth; struct scsi_device *sdev; struct request_queue *q; int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct queue_limits lim; sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_KERNEL); if (!sdev) goto out; sdev->vendor = scsi_null_device_strs; sdev->model = scsi_null_device_strs; sdev->rev = scsi_null_device_strs; sdev->host = shost; sdev->queue_ramp_up_period = SCSI_DEFAULT_RAMP_UP_PERIOD; sdev->id = starget->id; sdev->lun = lun; sdev->channel = starget->channel; mutex_init(&sdev->state_mutex); sdev->sdev_state = SDEV_CREATED; INIT_LIST_HEAD(&sdev->siblings); INIT_LIST_HEAD(&sdev->same_target_siblings); INIT_LIST_HEAD(&sdev->starved_entry); INIT_LIST_HEAD(&sdev->event_list); spin_lock_init(&sdev->list_lock); mutex_init(&sdev->inquiry_mutex); INIT_WORK(&sdev->event_work, scsi_evt_thread); INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue); sdev->sdev_gendev.parent = get_device(&starget->dev); sdev->sdev_target = starget; /* usually NULL and set by ->slave_alloc instead */ sdev->hostdata = hostdata; /* if the device needs this changing, it may do so in the * slave_configure function */ sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED; /* * Some low level driver could use device->type */ sdev->type = -1; /* * Assume that the device will have handshaking problems, * and then fix this field later if it turns out it * doesn't */ sdev->borken = 1; sdev->sg_reserved_size = INT_MAX; scsi_init_limits(shost, &lim); q = blk_mq_alloc_queue(&sdev->host->tag_set, &lim, sdev); if (IS_ERR(q)) { /* release fn is set up in scsi_sysfs_device_initialise, so * have to free and put manually here */ put_device(&starget->dev); kfree(sdev); goto out; } kref_get(&sdev->host->tagset_refcnt); sdev->request_queue = q; depth = sdev->host->cmd_per_lun ?: 1; /* * Use .can_queue as budget map's depth because we have to * support adjusting queue depth from sysfs. Meantime use * default device queue depth to figure out sbitmap shift * since we use this queue depth most of times. */ if (scsi_realloc_sdev_budget_map(sdev, depth)) { put_device(&starget->dev); kfree(sdev); goto out; } scsi_change_queue_depth(sdev, depth); scsi_sysfs_device_initialize(sdev); if (shost->hostt->slave_alloc) { ret = shost->hostt->slave_alloc(sdev); if (ret) { /* * if LLDD reports slave not present, don't clutter * console with alloc failure messages */ if (ret == -ENXIO) display_failure_msg = 0; goto out_device_destroy; } } return sdev; out_device_destroy: __scsi_remove_device(sdev); out: if (display_failure_msg) printk(ALLOC_FAILURE_MSG, __func__); return NULL; } static void scsi_target_destroy(struct scsi_target *starget) { struct device *dev = &starget->dev; struct Scsi_Host *shost = dev_to_shost(dev->parent); unsigned long flags; BUG_ON(starget->state == STARGET_DEL); starget->state = STARGET_DEL; transport_destroy_device(dev); spin_lock_irqsave(shost->host_lock, flags); if (shost->hostt->target_destroy) shost->hostt->target_destroy(starget); list_del_init(&starget->siblings); spin_unlock_irqrestore(shost->host_lock, flags); put_device(dev); } static void scsi_target_dev_release(struct device *dev) { struct device *parent = dev->parent; struct scsi_target *starget = to_scsi_target(dev); kfree(starget); put_device(parent); } static const struct device_type scsi_target_type = { .name = "scsi_target", .release = scsi_target_dev_release, }; int scsi_is_target_device(const struct device *dev) { return dev->type == &scsi_target_type; } EXPORT_SYMBOL(scsi_is_target_device); static struct scsi_target *__scsi_find_target(struct device *parent, int channel, uint id) { struct scsi_target *starget, *found_starget = NULL; struct Scsi_Host *shost = dev_to_shost(parent); /* * Search for an existing target for this sdev. */ list_for_each_entry(starget, &shost->__targets, siblings) { if (starget->id == id && starget->channel == channel) { found_starget = starget; break; } } if (found_starget) get_device(&found_starget->dev); return found_starget; } /** * scsi_target_reap_ref_release - remove target from visibility * @kref: the reap_ref in the target being released * * Called on last put of reap_ref, which is the indication that no device * under this target is visible anymore, so render the target invisible in * sysfs. Note: we have to be in user context here because the target reaps * should be done in places where the scsi device visibility is being removed. */ static void scsi_target_reap_ref_release(struct kref *kref) { struct scsi_target *starget = container_of(kref, struct scsi_target, reap_ref); /* * if we get here and the target is still in a CREATED state that * means it was allocated but never made visible (because a scan * turned up no LUNs), so don't call device_del() on it. */ if ((starget->state != STARGET_CREATED) && (starget->state != STARGET_CREATED_REMOVE)) { transport_remove_device(&starget->dev); device_del(&starget->dev); } scsi_target_destroy(starget); } static void scsi_target_reap_ref_put(struct scsi_target *starget) { kref_put(&starget->reap_ref, scsi_target_reap_ref_release); } /** * scsi_alloc_target - allocate a new or find an existing target * @parent: parent of the target (need not be a scsi host) * @channel: target channel number (zero if no channels) * @id: target id number * * Return an existing target if one exists, provided it hasn't already * gone into STARGET_DEL state, otherwise allocate a new target. * * The target is returned with an incremented reference, so the caller * is responsible for both reaping and doing a last put */ static struct scsi_target *scsi_alloc_target(struct device *parent, int channel, uint id) { struct Scsi_Host *shost = dev_to_shost(parent); struct device *dev = NULL; unsigned long flags; const int size = sizeof(struct scsi_target) + shost->transportt->target_size; struct scsi_target *starget; struct scsi_target *found_target; int error, ref_got; starget = kzalloc(size, GFP_KERNEL); if (!starget) { printk(KERN_ERR "%s: allocation failure\n", __func__); return NULL; } dev = &starget->dev; device_initialize(dev); kref_init(&starget->reap_ref); dev->parent = get_device(parent); dev_set_name(dev, "target%d:%d:%d", shost->host_no, channel, id); dev->bus = &scsi_bus_type; dev->type = &scsi_target_type; scsi_enable_async_suspend(dev); starget->id = id; starget->channel = channel; starget->can_queue = 0; INIT_LIST_HEAD(&starget->siblings); INIT_LIST_HEAD(&starget->devices); starget->state = STARGET_CREATED; starget->scsi_level = SCSI_2; starget->max_target_blocked = SCSI_DEFAULT_TARGET_BLOCKED; retry: spin_lock_irqsave(shost->host_lock, flags); found_target = __scsi_find_target(parent, channel, id); if (found_target) goto found; list_add_tail(&starget->siblings, &shost->__targets); spin_unlock_irqrestore(shost->host_lock, flags); /* allocate and add */ transport_setup_device(dev); if (shost->hostt->target_alloc) { error = shost->hostt->target_alloc(starget); if(error) { if (error != -ENXIO) dev_err(dev, "target allocation failed, error %d\n", error); /* don't want scsi_target_reap to do the final * put because it will be under the host lock */ scsi_target_destroy(starget); return NULL; } } get_device(dev); return starget; found: /* * release routine already fired if kref is zero, so if we can still * take the reference, the target must be alive. If we can't, it must * be dying and we need to wait for a new target */ ref_got = kref_get_unless_zero(&found_target->reap_ref); spin_unlock_irqrestore(shost->host_lock, flags); if (ref_got) { put_device(dev); return found_target; } /* * Unfortunately, we found a dying target; need to wait until it's * dead before we can get a new one. There is an anomaly here. We * *should* call scsi_target_reap() to balance the kref_get() of the * reap_ref above. However, since the target being released, it's * already invisible and the reap_ref is irrelevant. If we call * scsi_target_reap() we might spuriously do another device_del() on * an already invisible target. */ put_device(&found_target->dev); /* * length of time is irrelevant here, we just want to yield the CPU * for a tick to avoid busy waiting for the target to die. */ msleep(1); goto retry; } /** * scsi_target_reap - check to see if target is in use and destroy if not * @starget: target to be checked * * This is used after removing a LUN or doing a last put of the target * it checks atomically that nothing is using the target and removes * it if so. */ void scsi_target_reap(struct scsi_target *starget) { /* * serious problem if this triggers: STARGET_DEL is only set in the if * the reap_ref drops to zero, so we're trying to do another final put * on an already released kref */ BUG_ON(starget->state == STARGET_DEL); scsi_target_reap_ref_put(starget); } /** * scsi_sanitize_inquiry_string - remove non-graphical chars from an * INQUIRY result string * @s: INQUIRY result string to sanitize * @len: length of the string * * Description: * The SCSI spec says that INQUIRY vendor, product, and revision * strings must consist entirely of graphic ASCII characters, * padded on the right with spaces. Since not all devices obey * this rule, we will replace non-graphic or non-ASCII characters * with spaces. Exception: a NUL character is interpreted as a * string terminator, so all the following characters are set to * spaces. **/ void scsi_sanitize_inquiry_string(unsigned char *s, int len) { int terminated = 0; for (; len > 0; (--len, ++s)) { if (*s == 0) terminated = 1; if (terminated || *s < 0x20 || *s > 0x7e) *s = ' '; } } EXPORT_SYMBOL(scsi_sanitize_inquiry_string); /** * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY * @sdev: scsi_device to probe * @inq_result: area to store the INQUIRY result * @result_len: len of inq_result * @bflags: store any bflags found here * * Description: * Probe the lun associated with @req using a standard SCSI INQUIRY; * * If the INQUIRY is successful, zero is returned and the * INQUIRY data is in @inq_result; the scsi_level and INQUIRY length * are copied to the scsi_device any flags value is stored in *@bflags. **/ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result, int result_len, blist_flags_t *bflags) { unsigned char scsi_cmd[MAX_COMMAND_SIZE]; int first_inquiry_len, try_inquiry_len, next_inquiry_len; int response_len = 0; int pass, count, result, resid; struct scsi_failure failure_defs[] = { /* * not-ready to ready transition [asc/ascq=0x28/0x0] or * power-on, reset [asc/ascq=0x29/0x0], continue. INQUIRY * should not yield UNIT_ATTENTION but many buggy devices do * so anyway. */ { .sense = UNIT_ATTENTION, .asc = 0x28, .result = SAM_STAT_CHECK_CONDITION, }, { .sense = UNIT_ATTENTION, .asc = 0x29, .result = SAM_STAT_CHECK_CONDITION, }, { .allowed = 1, .result = DID_TIME_OUT << 16, }, {} }; struct scsi_failures failures = { .total_allowed = 3, .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .resid = &resid, .failures = &failures, }; *bflags = 0; /* Perform up to 3 passes. The first pass uses a conservative * transfer length of 36 unless sdev->inquiry_len specifies a * different value. */ first_inquiry_len = sdev->inquiry_len ? sdev->inquiry_len : 36; try_inquiry_len = first_inquiry_len; pass = 1; next_pass: SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev, "scsi scan: INQUIRY pass %d length %d\n", pass, try_inquiry_len)); /* Each pass gets up to three chances to ignore Unit Attention */ scsi_failures_reset_retries(&failures); for (count = 0; count < 3; ++count) { memset(scsi_cmd, 0, 6); scsi_cmd[0] = INQUIRY; scsi_cmd[4] = (unsigned char) try_inquiry_len; memset(inq_result, 0, try_inquiry_len); result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, inq_result, try_inquiry_len, HZ / 2 + HZ * scsi_inq_timeout, 3, &exec_args); SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev, "scsi scan: INQUIRY %s with code 0x%x\n", result ? "failed" : "successful", result)); if (result == 0) { /* * if nothing was transferred, we try * again. It's a workaround for some USB * devices. */ if (resid == try_inquiry_len) continue; } break; } if (result == 0) { scsi_sanitize_inquiry_string(&inq_result[8], 8); scsi_sanitize_inquiry_string(&inq_result[16], 16); scsi_sanitize_inquiry_string(&inq_result[32], 4); response_len = inq_result[4] + 5; if (response_len > 255) response_len = first_inquiry_len; /* sanity */ /* * Get any flags for this device. * * XXX add a bflags to scsi_device, and replace the * corresponding bit fields in scsi_device, so bflags * need not be passed as an argument. */ *bflags = scsi_get_device_flags(sdev, &inq_result[8], &inq_result[16]); /* When the first pass succeeds we gain information about * what larger transfer lengths might work. */ if (pass == 1) { if (BLIST_INQUIRY_36 & *bflags) next_inquiry_len = 36; /* * LLD specified a maximum sdev->inquiry_len * but device claims it has more data. Capping * the length only makes sense for legacy * devices. If a device supports SPC-4 (2014) * or newer, assume that it is safe to ask for * as much as the device says it supports. */ else if (sdev->inquiry_len && response_len > sdev->inquiry_len && (inq_result[2] & 0x7) < 6) /* SPC-4 */ next_inquiry_len = sdev->inquiry_len; else next_inquiry_len = response_len; /* If more data is available perform the second pass */ if (next_inquiry_len > try_inquiry_len) { try_inquiry_len = next_inquiry_len; pass = 2; goto next_pass; } } } else if (pass == 2) { sdev_printk(KERN_INFO, sdev, "scsi scan: %d byte inquiry failed. " "Consider BLIST_INQUIRY_36 for this device\n", try_inquiry_len); /* If this pass failed, the third pass goes back and transfers * the same amount as we successfully got in the first pass. */ try_inquiry_len = first_inquiry_len; pass = 3; goto next_pass; } /* If the last transfer attempt got an error, assume the * peripheral doesn't exist or is dead. */ if (result) return -EIO; /* Don't report any more data than the device says is valid */ sdev->inquiry_len = min(try_inquiry_len, response_len); /* * XXX Abort if the response length is less than 36? If less than * 32, the lookup of the device flags (above) could be invalid, * and it would be possible to take an incorrect action - we do * not want to hang because of a short INQUIRY. On the flip side, * if the device is spun down or becoming ready (and so it gives a * short INQUIRY), an abort here prevents any further use of the * device, including spin up. * * On the whole, the best approach seems to be to assume the first * 36 bytes are valid no matter what the device says. That's * better than copying < 36 bytes to the inquiry-result buffer * and displaying garbage for the Vendor, Product, or Revision * strings. */ if (sdev->inquiry_len < 36) { if (!sdev->host->short_inquiry) { shost_printk(KERN_INFO, sdev->host, "scsi scan: INQUIRY result too short (%d)," " using 36\n", sdev->inquiry_len); sdev->host->short_inquiry = 1; } sdev->inquiry_len = 36; } /* * Related to the above issue: * * XXX Devices (disk or all?) should be sent a TEST UNIT READY, * and if not ready, sent a START_STOP to start (maybe spin up) and * then send the INQUIRY again, since the INQUIRY can change after * a device is initialized. * * Ideally, start a device if explicitly asked to do so. This * assumes that a device is spun up on power on, spun down on * request, and then spun up on request. */ /* * The scanning code needs to know the scsi_level, even if no * device is attached at LUN 0 (SCSI_SCAN_TARGET_PRESENT) so * non-zero LUNs can be scanned. */ sdev->scsi_level = inq_result[2] & 0x0f; if (sdev->scsi_level >= 2 || (sdev->scsi_level == 1 && (inq_result[3] & 0x0f) == 1)) sdev->scsi_level++; sdev->sdev_target->scsi_level = sdev->scsi_level; /* * If SCSI-2 or lower, and if the transport requires it, * store the LUN value in CDB[1]. */ sdev->lun_in_cdb = 0; if (sdev->scsi_level <= SCSI_2 && sdev->scsi_level != SCSI_UNKNOWN && !sdev->host->no_scsi2_lun_in_cdb) sdev->lun_in_cdb = 1; return 0; } /** * scsi_add_lun - allocate and fully initialze a scsi_device * @sdev: holds information to be stored in the new scsi_device * @inq_result: holds the result of a previous INQUIRY to the LUN * @bflags: black/white list flag * @async: 1 if this device is being scanned asynchronously * * Description: * Initialize the scsi_device @sdev. Optionally set fields based * on values in *@bflags. * * Return: * SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, blist_flags_t *bflags, int async) { const struct scsi_host_template *hostt = sdev->host->hostt; struct queue_limits lim; int ret; /* * XXX do not save the inquiry, since it can change underneath us, * save just vendor/model/rev. * * Rather than save it and have an ioctl that retrieves the saved * value, have an ioctl that executes the same INQUIRY code used * in scsi_probe_lun, let user level programs doing INQUIRY * scanning run at their own risk, or supply a user level program * that can correctly scan. */ /* * Copy at least 36 bytes of INQUIRY data, so that we don't * dereference unallocated memory when accessing the Vendor, * Product, and Revision strings. Badly behaved devices may set * the INQUIRY Additional Length byte to a small value, indicating * these strings are invalid, but often they contain plausible data * nonetheless. It doesn't matter if the device sent < 36 bytes * total, since scsi_probe_lun() initializes inq_result with 0s. */ sdev->inquiry = kmemdup(inq_result, max_t(size_t, sdev->inquiry_len, 36), GFP_KERNEL); if (sdev->inquiry == NULL) return SCSI_SCAN_NO_RESPONSE; sdev->vendor = (char *) (sdev->inquiry + 8); sdev->model = (char *) (sdev->inquiry + 16); sdev->rev = (char *) (sdev->inquiry + 32); if (strncmp(sdev->vendor, "ATA ", 8) == 0) { /* * sata emulation layer device. This is a hack to work around * the SATL power management specifications which state that * when the SATL detects the device has gone into standby * mode, it shall respond with NOT READY. */ sdev->allow_restart = 1; } if (*bflags & BLIST_ISROM) { sdev->type = TYPE_ROM; sdev->removable = 1; } else { sdev->type = (inq_result[0] & 0x1f); sdev->removable = (inq_result[1] & 0x80) >> 7; /* * some devices may respond with wrong type for * well-known logical units. Force well-known type * to enumerate them correctly. */ if (scsi_is_wlun(sdev->lun) && sdev->type != TYPE_WLUN) { sdev_printk(KERN_WARNING, sdev, "%s: correcting incorrect peripheral device type 0x%x for W-LUN 0x%16xhN\n", __func__, sdev->type, (unsigned int)sdev->lun); sdev->type = TYPE_WLUN; } } if (sdev->type == TYPE_RBC || sdev->type == TYPE_ROM) { /* RBC and MMC devices can return SCSI-3 compliance and yet * still not support REPORT LUNS, so make them act as * BLIST_NOREPORTLUN unless BLIST_REPORTLUN2 is * specifically set */ if ((*bflags & BLIST_REPORTLUN2) == 0) *bflags |= BLIST_NOREPORTLUN; } /* * For a peripheral qualifier (PQ) value of 1 (001b), the SCSI * spec says: The device server is capable of supporting the * specified peripheral device type on this logical unit. However, * the physical device is not currently connected to this logical * unit. * * The above is vague, as it implies that we could treat 001 and * 011 the same. Stay compatible with previous code, and create a * scsi_device for a PQ of 1 * * Don't set the device offline here; rather let the upper * level drivers eval the PQ to decide whether they should * attach. So remove ((inq_result[0] >> 5) & 7) == 1 check. */ sdev->inq_periph_qual = (inq_result[0] >> 5) & 7; sdev->lockable = sdev->removable; sdev->soft_reset = (inq_result[7] & 1) && ((inq_result[3] & 7) == 2); if (sdev->scsi_level >= SCSI_3 || (sdev->inquiry_len > 56 && inq_result[56] & 0x04)) sdev->ppr = 1; if (inq_result[7] & 0x60) sdev->wdtr = 1; if (inq_result[7] & 0x10) sdev->sdtr = 1; sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d " "ANSI: %d%s\n", scsi_device_type(sdev->type), sdev->vendor, sdev->model, sdev->rev, sdev->inq_periph_qual, inq_result[2] & 0x07, (inq_result[3] & 0x0f) == 1 ? " CCS" : ""); if ((sdev->scsi_level >= SCSI_2) && (inq_result[7] & 2) && !(*bflags & BLIST_NOTQ)) { sdev->tagged_supported = 1; sdev->simple_tags = 1; } /* * Some devices (Texel CD ROM drives) have handshaking problems * when used with the Seagate controllers. borken is initialized * to 1, and then set it to 0 here. */ if ((*bflags & BLIST_BORKEN) == 0) sdev->borken = 0; if (*bflags & BLIST_NO_ULD_ATTACH) sdev->no_uld_attach = 1; /* * Apparently some really broken devices (contrary to the SCSI * standards) need to be selected without asserting ATN */ if (*bflags & BLIST_SELECT_NO_ATN) sdev->select_no_atn = 1; /* * Some devices may not want to have a start command automatically * issued when a device is added. */ if (*bflags & BLIST_NOSTARTONADD) sdev->no_start_on_add = 1; if (*bflags & BLIST_SINGLELUN) scsi_target(sdev)->single_lun = 1; sdev->use_10_for_rw = 1; /* some devices don't like REPORT SUPPORTED OPERATION CODES * and will simply timeout causing sd_mod init to take a very * very long time */ if (*bflags & BLIST_NO_RSOC) sdev->no_report_opcodes = 1; /* set the device running here so that slave configure * may do I/O */ mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, SDEV_RUNNING); if (ret) ret = scsi_device_set_state(sdev, SDEV_BLOCK); mutex_unlock(&sdev->state_mutex); if (ret) { sdev_printk(KERN_ERR, sdev, "in wrong state %s to complete scan\n", scsi_device_state_name(sdev->sdev_state)); return SCSI_SCAN_NO_RESPONSE; } if (*bflags & BLIST_NOT_LOCKABLE) sdev->lockable = 0; if (*bflags & BLIST_RETRY_HWERROR) sdev->retry_hwerror = 1; if (*bflags & BLIST_NO_DIF) sdev->no_dif = 1; if (*bflags & BLIST_UNMAP_LIMIT_WS) sdev->unmap_limit_for_ws = 1; if (*bflags & BLIST_IGN_MEDIA_CHANGE) sdev->ignore_media_change = 1; sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; if (*bflags & BLIST_TRY_VPD_PAGES) sdev->try_vpd_pages = 1; else if (*bflags & BLIST_SKIP_VPD_PAGES) sdev->skip_vpd_pages = 1; if (*bflags & BLIST_NO_VPD_SIZE) sdev->no_vpd_size = 1; transport_configure_device(&sdev->sdev_gendev); /* * No need to freeze the queue as it isn't reachable to anyone else yet. */ lim = queue_limits_start_update(sdev->request_queue); if (*bflags & BLIST_MAX_512) lim.max_hw_sectors = 512; else if (*bflags & BLIST_MAX_1024) lim.max_hw_sectors = 1024; if (hostt->device_configure) ret = hostt->device_configure(sdev, &lim); else if (hostt->slave_configure) ret = hostt->slave_configure(sdev); if (ret) { queue_limits_cancel_update(sdev->request_queue); /* * If the LLDD reports device not present, don't clutter the * console with failure messages. */ if (ret != -ENXIO) sdev_printk(KERN_ERR, sdev, "failed to configure device\n"); return SCSI_SCAN_NO_RESPONSE; } ret = queue_limits_commit_update(sdev->request_queue, &lim); if (ret) { sdev_printk(KERN_ERR, sdev, "failed to apply queue limits.\n"); return SCSI_SCAN_NO_RESPONSE; } /* * The queue_depth is often changed in ->device_configure. * * Set up budget map again since memory consumption of the map depends * on actual queue depth. */ if (hostt->device_configure || hostt->slave_configure) scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth); if (sdev->scsi_level >= SCSI_3) scsi_attach_vpd(sdev); scsi_cdl_check(sdev); sdev->max_queue_depth = sdev->queue_depth; WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map.depth); sdev->sdev_bflags = *bflags; /* * Ok, the device is now all set up, we can * register it and tell the rest of the kernel * about it. */ if (!async && scsi_sysfs_add_sdev(sdev) != 0) return SCSI_SCAN_NO_RESPONSE; return SCSI_SCAN_LUN_PRESENT; } #ifdef CONFIG_SCSI_LOGGING /** * scsi_inq_str - print INQUIRY data from min to max index, strip trailing whitespace * @buf: Output buffer with at least end-first+1 bytes of space * @inq: Inquiry buffer (input) * @first: Offset of string into inq * @end: Index after last character in inq */ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq, unsigned first, unsigned end) { unsigned term = 0, idx; for (idx = 0; idx + first < end && idx + first < inq[4] + 5; idx++) { if (inq[idx+first] > ' ') { buf[idx] = inq[idx+first]; term = idx+1; } else { buf[idx] = ' '; } } buf[term] = 0; return buf; } #endif /** * scsi_probe_and_add_lun - probe a LUN, if a LUN is found add it * @starget: pointer to target device structure * @lun: LUN of target device * @bflagsp: store bflags here if not NULL * @sdevp: probe the LUN corresponding to this scsi_device * @rescan: if not equal to SCSI_SCAN_INITIAL skip some code only * needed on first scan * @hostdata: passed to scsi_alloc_sdev() * * Description: * Call scsi_probe_lun, if a LUN with an attached device is found, * allocate and set it up by calling scsi_add_lun. * * Return: * * - SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device * - SCSI_SCAN_TARGET_PRESENT: target responded, but no device is * attached at the LUN * - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ static int scsi_probe_and_add_lun(struct scsi_target *starget, u64 lun, blist_flags_t *bflagsp, struct scsi_device **sdevp, enum scsi_scan_mode rescan, void *hostdata) { struct scsi_device *sdev; unsigned char *result; blist_flags_t bflags; int res = SCSI_SCAN_NO_RESPONSE, result_len = 256; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); /* * The rescan flag is used as an optimization, the first scan of a * host adapter calls into here with rescan == 0. */ sdev = scsi_device_lookup_by_target(starget, lun); if (sdev) { if (rescan != SCSI_SCAN_INITIAL || !scsi_device_created(sdev)) { SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev, "scsi scan: device exists on %s\n", dev_name(&sdev->sdev_gendev))); if (sdevp) *sdevp = sdev; else scsi_device_put(sdev); if (bflagsp) *bflagsp = scsi_get_device_flags(sdev, sdev->vendor, sdev->model); return SCSI_SCAN_LUN_PRESENT; } scsi_device_put(sdev); } else sdev = scsi_alloc_sdev(starget, lun, hostdata); if (!sdev) goto out; result = kmalloc(result_len, GFP_KERNEL); if (!result) goto out_free_sdev; if (scsi_probe_lun(sdev, result, result_len, &bflags)) goto out_free_result; if (bflagsp) *bflagsp = bflags; /* * result contains valid SCSI INQUIRY data. */ if ((result[0] >> 5) == 3) { /* * For a Peripheral qualifier 3 (011b), the SCSI * spec says: The device server is not capable of * supporting a physical device on this logical * unit. * * For disks, this implies that there is no * logical disk configured at sdev->lun, but there * is a target id responding. */ SCSI_LOG_SCAN_BUS(2, sdev_printk(KERN_INFO, sdev, "scsi scan:" " peripheral qualifier of 3, device not" " added\n")) if (lun == 0) { SCSI_LOG_SCAN_BUS(1, { unsigned char vend[9]; unsigned char mod[17]; sdev_printk(KERN_INFO, sdev, "scsi scan: consider passing scsi_mod." "dev_flags=%s:%s:0x240 or 0x1000240\n", scsi_inq_str(vend, result, 8, 16), scsi_inq_str(mod, result, 16, 32)); }); } res = SCSI_SCAN_TARGET_PRESENT; goto out_free_result; } /* * Some targets may set slight variations of PQ and PDT to signal * that no LUN is present, so don't add sdev in these cases. * Two specific examples are: * 1) NetApp targets: return PQ=1, PDT=0x1f * 2) USB UFI: returns PDT=0x1f, with the PQ bits being "reserved" * in the UFI 1.0 spec (we cannot rely on reserved bits). * * References: * 1) SCSI SPC-3, pp. 145-146 * PQ=1: "A peripheral device having the specified peripheral * device type is not connected to this logical unit. However, the * device server is capable of supporting the specified peripheral * device type on this logical unit." * PDT=0x1f: "Unknown or no device type" * 2) USB UFI 1.0, p. 20 * PDT=00h Direct-access device (floppy) * PDT=1Fh none (no FDD connected to the requested logical unit) */ if (((result[0] >> 5) == 1 || starget->pdt_1f_for_no_lun) && (result[0] & 0x1f) == 0x1f && !scsi_is_wlun(lun)) { SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev, "scsi scan: peripheral device type" " of 31, no device added\n")); res = SCSI_SCAN_TARGET_PRESENT; goto out_free_result; } res = scsi_add_lun(sdev, result, &bflags, shost->async_scan); if (res == SCSI_SCAN_LUN_PRESENT) { if (bflags & BLIST_KEY) { sdev->lockable = 0; scsi_unlock_floptical(sdev, result); } } out_free_result: kfree(result); out_free_sdev: if (res == SCSI_SCAN_LUN_PRESENT) { if (sdevp) { if (scsi_device_get(sdev) == 0) { *sdevp = sdev; } else { __scsi_remove_device(sdev); res = SCSI_SCAN_NO_RESPONSE; } } } else __scsi_remove_device(sdev); out: return res; } /** * scsi_sequential_lun_scan - sequentially scan a SCSI target * @starget: pointer to target structure to scan * @bflags: black/white list flag for LUN 0 * @scsi_level: Which version of the standard does this device adhere to * @rescan: passed to scsi_probe_add_lun() * * Description: * Generally, scan from LUN 1 (LUN 0 is assumed to already have been * scanned) to some maximum lun until a LUN is found with no device * attached. Use the bflags to figure out any oddities. * * Modifies sdevscan->lun. **/ static void scsi_sequential_lun_scan(struct scsi_target *starget, blist_flags_t bflags, int scsi_level, enum scsi_scan_mode rescan) { uint max_dev_lun; u64 sparse_lun, lun; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); SCSI_LOG_SCAN_BUS(3, starget_printk(KERN_INFO, starget, "scsi scan: Sequential scan\n")); max_dev_lun = min(max_scsi_luns, shost->max_lun); /* * If this device is known to support sparse multiple units, * override the other settings, and scan all of them. Normally, * SCSI-3 devices should be scanned via the REPORT LUNS. */ if (bflags & BLIST_SPARSELUN) { max_dev_lun = shost->max_lun; sparse_lun = 1; } else sparse_lun = 0; /* * If less than SCSI_1_CCS, and no special lun scanning, stop * scanning; this matches 2.4 behaviour, but could just be a bug * (to continue scanning a SCSI_1_CCS device). * * This test is broken. We might not have any device on lun0 for * a sparselun device, and if that's the case then how would we * know the real scsi_level, eh? It might make sense to just not * scan any SCSI_1 device for non-0 luns, but that check would best * go into scsi_alloc_sdev() and just have it return null when asked * to alloc an sdev for lun > 0 on an already found SCSI_1 device. * if ((sdevscan->scsi_level < SCSI_1_CCS) && ((bflags & (BLIST_FORCELUN | BLIST_SPARSELUN | BLIST_MAX5LUN)) == 0)) return; */ /* * If this device is known to support multiple units, override * the other settings, and scan all of them. */ if (bflags & BLIST_FORCELUN) max_dev_lun = shost->max_lun; /* * REGAL CDC-4X: avoid hang after LUN 4 */ if (bflags & BLIST_MAX5LUN) max_dev_lun = min(5U, max_dev_lun); /* * Do not scan SCSI-2 or lower device past LUN 7, unless * BLIST_LARGELUN. */ if (scsi_level < SCSI_3 && !(bflags & BLIST_LARGELUN)) max_dev_lun = min(8U, max_dev_lun); else max_dev_lun = min(256U, max_dev_lun); /* * We have already scanned LUN 0, so start at LUN 1. Keep scanning * until we reach the max, or no LUN is found and we are not * sparse_lun. */ for (lun = 1; lun < max_dev_lun; ++lun) if ((scsi_probe_and_add_lun(starget, lun, NULL, NULL, rescan, NULL) != SCSI_SCAN_LUN_PRESENT) && !sparse_lun) return; } /** * scsi_report_lun_scan - Scan using SCSI REPORT LUN results * @starget: which target * @bflags: Zero or a mix of BLIST_NOLUN, BLIST_REPORTLUN2, or BLIST_NOREPORTLUN * @rescan: nonzero if we can skip code only needed on first scan * * Description: * Fast scanning for modern (SCSI-3) devices by sending a REPORT LUN command. * Scan the resulting list of LUNs by calling scsi_probe_and_add_lun. * * If BLINK_REPORTLUN2 is set, scan a target that supports more than 8 * LUNs even if it's older than SCSI-3. * If BLIST_NOREPORTLUN is set, return 1 always. * If BLIST_NOLUN is set, return 0 always. * If starget->no_report_luns is set, return 1 always. * * Return: * 0: scan completed (or no memory, so further scanning is futile) * 1: could not scan with REPORT LUN **/ static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags, enum scsi_scan_mode rescan) { unsigned char scsi_cmd[MAX_COMMAND_SIZE]; unsigned int length; u64 lun; unsigned int num_luns; int result; struct scsi_lun *lunp, *lun_data; struct scsi_device *sdev; struct Scsi_Host *shost = dev_to_shost(&starget->dev); struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .result = SAM_STAT_CHECK_CONDITION, }, /* Fail all CCs except the UA above */ { .sense = SCMD_FAILURE_SENSE_ANY, .result = SAM_STAT_CHECK_CONDITION, }, /* Retry any other errors not listed above */ { .result = SCMD_FAILURE_RESULT_ANY, }, {} }; struct scsi_failures failures = { .total_allowed = 3, .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { .failures = &failures, }; int ret = 0; /* * Only support SCSI-3 and up devices if BLIST_NOREPORTLUN is not set. * Also allow SCSI-2 if BLIST_REPORTLUN2 is set and host adapter does * support more than 8 LUNs. * Don't attempt if the target doesn't support REPORT LUNS. */ if (bflags & BLIST_NOREPORTLUN) return 1; if (starget->scsi_level < SCSI_2 && starget->scsi_level != SCSI_UNKNOWN) return 1; if (starget->scsi_level < SCSI_3 && (!(bflags & BLIST_REPORTLUN2) || shost->max_lun <= 8)) return 1; if (bflags & BLIST_NOLUN) return 0; if (starget->no_report_luns) return 1; if (!(sdev = scsi_device_lookup_by_target(starget, 0))) { sdev = scsi_alloc_sdev(starget, 0, NULL); if (!sdev) return 0; if (scsi_device_get(sdev)) { __scsi_remove_device(sdev); return 0; } } /* * Allocate enough to hold the header (the same size as one scsi_lun) * plus the number of luns we are requesting. 511 was the default * value of the now removed max_report_luns parameter. */ length = (511 + 1) * sizeof(struct scsi_lun); retry: lun_data = kmalloc(length, GFP_KERNEL); if (!lun_data) { printk(ALLOC_FAILURE_MSG, __func__); goto out; } scsi_cmd[0] = REPORT_LUNS; /* * bytes 1 - 5: reserved, set to zero. */ memset(&scsi_cmd[1], 0, 5); /* * bytes 6 - 9: length of the command. */ put_unaligned_be32(length, &scsi_cmd[6]); scsi_cmd[10] = 0; /* reserved */ scsi_cmd[11] = 0; /* control */ /* * We can get a UNIT ATTENTION, for example a power on/reset, so * retry a few times (like sd.c does for TEST UNIT READY). * Experience shows some combinations of adapter/devices get at * least two power on/resets. * * Illegal requests (for devices that do not support REPORT LUNS) * should come through as a check condition, and will not generate * a retry. */ scsi_failures_reset_retries(&failures); SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev, "scsi scan: Sending REPORT LUNS\n")); result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, lun_data, length, SCSI_REPORT_LUNS_TIMEOUT, 3, &exec_args); SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev, "scsi scan: REPORT LUNS %s result 0x%x\n", result ? "failed" : "successful", result)); if (result) { /* * The device probably does not support a REPORT LUN command */ ret = 1; goto out_err; } /* * Get the length from the first four bytes of lun_data. */ if (get_unaligned_be32(lun_data->scsi_lun) + sizeof(struct scsi_lun) > length) { length = get_unaligned_be32(lun_data->scsi_lun) + sizeof(struct scsi_lun); kfree(lun_data); goto retry; } length = get_unaligned_be32(lun_data->scsi_lun); num_luns = (length / sizeof(struct scsi_lun)); SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev, "scsi scan: REPORT LUN scan\n")); /* * Scan the luns in lun_data. The entry at offset 0 is really * the header, so start at 1 and go up to and including num_luns. */ for (lunp = &lun_data[1]; lunp <= &lun_data[num_luns]; lunp++) { lun = scsilun_to_int(lunp); if (lun > sdev->host->max_lun) { sdev_printk(KERN_WARNING, sdev, "lun%llu has a LUN larger than" " allowed by the host adapter\n", lun); } else { int res; res = scsi_probe_and_add_lun(starget, lun, NULL, NULL, rescan, NULL); if (res == SCSI_SCAN_NO_RESPONSE) { /* * Got some results, but now none, abort. */ sdev_printk(KERN_ERR, sdev, "Unexpected response" " from lun %llu while scanning, scan" " aborted\n", (unsigned long long)lun); break; } } } out_err: kfree(lun_data); out: if (scsi_device_created(sdev)) /* * the sdev we used didn't appear in the report luns scan */ __scsi_remove_device(sdev); scsi_device_put(sdev); return ret; } struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel, uint id, u64 lun, void *hostdata) { struct scsi_device *sdev = ERR_PTR(-ENODEV); struct device *parent = &shost->shost_gendev; struct scsi_target *starget; if (strncmp(scsi_scan_type, "none", 4) == 0) return ERR_PTR(-ENODEV); starget = scsi_alloc_target(parent, channel, id); if (!starget) return ERR_PTR(-ENOMEM); scsi_autopm_get_target(starget); mutex_lock(&shost->scan_mutex); if (!shost->async_scan) scsi_complete_async_scans(); if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) { scsi_probe_and_add_lun(starget, lun, NULL, &sdev, SCSI_SCAN_RESCAN, hostdata); scsi_autopm_put_host(shost); } mutex_unlock(&shost->scan_mutex); scsi_autopm_put_target(starget); /* * paired with scsi_alloc_target(). Target will be destroyed unless * scsi_probe_and_add_lun made an underlying device visible */ scsi_target_reap(starget); put_device(&starget->dev); return sdev; } EXPORT_SYMBOL(__scsi_add_device); int scsi_add_device(struct Scsi_Host *host, uint channel, uint target, u64 lun) { struct scsi_device *sdev = __scsi_add_device(host, channel, target, lun, NULL); if (IS_ERR(sdev)) return PTR_ERR(sdev); scsi_device_put(sdev); return 0; } EXPORT_SYMBOL(scsi_add_device); int scsi_resume_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; int ret = 0; device_lock(dev); /* * Bail out if the device or its queue are not running. Otherwise, * the rescan may block waiting for commands to be executed, with us * holding the device lock. This can result in a potential deadlock * in the power management core code when system resume is on-going. */ if (sdev->sdev_state != SDEV_RUNNING || blk_queue_pm_only(sdev->request_queue)) { ret = -EWOULDBLOCK; goto unlock; } if (dev->driver && try_module_get(dev->driver->owner)) { struct scsi_driver *drv = to_scsi_driver(dev->driver); if (drv->resume) ret = drv->resume(dev); module_put(dev->driver->owner); } unlock: device_unlock(dev); return ret; } EXPORT_SYMBOL(scsi_resume_device); int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; int ret = 0; device_lock(dev); /* * Bail out if the device or its queue are not running. Otherwise, * the rescan may block waiting for commands to be executed, with us * holding the device lock. This can result in a potential deadlock * in the power management core code when system resume is on-going. */ if (sdev->sdev_state != SDEV_RUNNING || blk_queue_pm_only(sdev->request_queue)) { ret = -EWOULDBLOCK; goto unlock; } scsi_attach_vpd(sdev); scsi_cdl_check(sdev); if (sdev->handler && sdev->handler->rescan) sdev->handler->rescan(sdev); if (dev->driver && try_module_get(dev->driver->owner)) { struct scsi_driver *drv = to_scsi_driver(dev->driver); if (drv->rescan) drv->rescan(dev); module_put(dev->driver->owner); } unlock: device_unlock(dev); return ret; } EXPORT_SYMBOL(scsi_rescan_device); static void __scsi_scan_target(struct device *parent, unsigned int channel, unsigned int id, u64 lun, enum scsi_scan_mode rescan) { struct Scsi_Host *shost = dev_to_shost(parent); blist_flags_t bflags = 0; int res; struct scsi_target *starget; if (shost->this_id == id) /* * Don't scan the host adapter */ return; starget = scsi_alloc_target(parent, channel, id); if (!starget) return; scsi_autopm_get_target(starget); if (lun != SCAN_WILD_CARD) { /* * Scan for a specific host/chan/id/lun. */ scsi_probe_and_add_lun(starget, lun, NULL, NULL, rescan, NULL); goto out_reap; } /* * Scan LUN 0, if there is some response, scan further. Ideally, we * would not configure LUN 0 until all LUNs are scanned. */ res = scsi_probe_and_add_lun(starget, 0, &bflags, NULL, rescan, NULL); if (res == SCSI_SCAN_LUN_PRESENT || res == SCSI_SCAN_TARGET_PRESENT) { if (scsi_report_lun_scan(starget, bflags, rescan) != 0) /* * The REPORT LUN did not scan the target, * do a sequential scan. */ scsi_sequential_lun_scan(starget, bflags, starget->scsi_level, rescan); } out_reap: scsi_autopm_put_target(starget); /* * paired with scsi_alloc_target(): determine if the target has * any children at all and if not, nuke it */ scsi_target_reap(starget); put_device(&starget->dev); } /** * scsi_scan_target - scan a target id, possibly including all LUNs on the target. * @parent: host to scan * @channel: channel to scan * @id: target id to scan * @lun: Specific LUN to scan or SCAN_WILD_CARD * @rescan: passed to LUN scanning routines; SCSI_SCAN_INITIAL for * no rescan, SCSI_SCAN_RESCAN to rescan existing LUNs, * and SCSI_SCAN_MANUAL to force scanning even if * 'scan=manual' is set. * * Description: * Scan the target id on @parent, @channel, and @id. Scan at least LUN 0, * and possibly all LUNs on the target id. * * First try a REPORT LUN scan, if that does not scan the target, do a * sequential scan of LUNs on the target id. **/ void scsi_scan_target(struct device *parent, unsigned int channel, unsigned int id, u64 lun, enum scsi_scan_mode rescan) { struct Scsi_Host *shost = dev_to_shost(parent); if (strncmp(scsi_scan_type, "none", 4) == 0) return; if (rescan != SCSI_SCAN_MANUAL && strncmp(scsi_scan_type, "manual", 6) == 0) return; mutex_lock(&shost->scan_mutex); if (!shost->async_scan) scsi_complete_async_scans(); if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) { __scsi_scan_target(parent, channel, id, lun, rescan); scsi_autopm_put_host(shost); } mutex_unlock(&shost->scan_mutex); } EXPORT_SYMBOL(scsi_scan_target); static void scsi_scan_channel(struct Scsi_Host *shost, unsigned int channel, unsigned int id, u64 lun, enum scsi_scan_mode rescan) { uint order_id; if (id == SCAN_WILD_CARD) for (id = 0; id < shost->max_id; ++id) { /* * XXX adapter drivers when possible (FCP, iSCSI) * could modify max_id to match the current max, * not the absolute max. * * XXX add a shost id iterator, so for example, * the FC ID can be the same as a target id * without a huge overhead of sparse id's. */ if (shost->reverse_ordering) /* * Scan from high to low id. */ order_id = shost->max_id - id - 1; else order_id = id; __scsi_scan_target(&shost->shost_gendev, channel, order_id, lun, rescan); } else __scsi_scan_target(&shost->shost_gendev, channel, id, lun, rescan); } int scsi_scan_host_selected(struct Scsi_Host *shost, unsigned int channel, unsigned int id, u64 lun, enum scsi_scan_mode rescan) { SCSI_LOG_SCAN_BUS(3, shost_printk (KERN_INFO, shost, "%s: <%u:%u:%llu>\n", __func__, channel, id, lun)); if (((channel != SCAN_WILD_CARD) && (channel > shost->max_channel)) || ((id != SCAN_WILD_CARD) && (id >= shost->max_id)) || ((lun != SCAN_WILD_CARD) && (lun >= shost->max_lun))) return -EINVAL; mutex_lock(&shost->scan_mutex); if (!shost->async_scan) scsi_complete_async_scans(); if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) { if (channel == SCAN_WILD_CARD) for (channel = 0; channel <= shost->max_channel; channel++) scsi_scan_channel(shost, channel, id, lun, rescan); else scsi_scan_channel(shost, channel, id, lun, rescan); scsi_autopm_put_host(shost); } mutex_unlock(&shost->scan_mutex); return 0; } static void scsi_sysfs_add_devices(struct Scsi_Host *shost) { struct scsi_device *sdev; shost_for_each_device(sdev, shost) { /* target removed before the device could be added */ if (sdev->sdev_state == SDEV_DEL) continue; /* If device is already visible, skip adding it to sysfs */ if (sdev->is_visible) continue; if (!scsi_host_scan_allowed(shost) || scsi_sysfs_add_sdev(sdev) != 0) __scsi_remove_device(sdev); } } /** * scsi_prep_async_scan - prepare for an async scan * @shost: the host which will be scanned * Returns: a cookie to be passed to scsi_finish_async_scan() * * Tells the midlayer this host is going to do an asynchronous scan. * It reserves the host's position in the scanning list and ensures * that other asynchronous scans started after this one won't affect the * ordering of the discovered devices. */ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) { struct async_scan_data *data = NULL; unsigned long flags; if (strncmp(scsi_scan_type, "sync", 4) == 0) return NULL; mutex_lock(&shost->scan_mutex); if (shost->async_scan) { shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__); goto err; } data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) goto err; data->shost = scsi_host_get(shost); if (!data->shost) goto err; init_completion(&data->prev_finished); spin_lock_irqsave(shost->host_lock, flags); shost->async_scan = 1; spin_unlock_irqrestore(shost->host_lock, flags); mutex_unlock(&shost->scan_mutex); spin_lock(&async_scan_lock); if (list_empty(&scanning_hosts)) complete(&data->prev_finished); list_add_tail(&data->list, &scanning_hosts); spin_unlock(&async_scan_lock); return data; err: mutex_unlock(&shost->scan_mutex); kfree(data); return NULL; } /** * scsi_finish_async_scan - asynchronous scan has finished * @data: cookie returned from earlier call to scsi_prep_async_scan() * * All the devices currently attached to this host have been found. * This function announces all the devices it has found to the rest * of the system. */ static void scsi_finish_async_scan(struct async_scan_data *data) { struct Scsi_Host *shost; unsigned long flags; if (!data) return; shost = data->shost; mutex_lock(&shost->scan_mutex); if (!shost->async_scan) { shost_printk(KERN_INFO, shost, "%s called twice\n", __func__); dump_stack(); mutex_unlock(&shost->scan_mutex); return; } wait_for_completion(&data->prev_finished); scsi_sysfs_add_devices(shost); spin_lock_irqsave(shost->host_lock, flags); shost->async_scan = 0; spin_unlock_irqrestore(shost->host_lock, flags); mutex_unlock(&shost->scan_mutex); spin_lock(&async_scan_lock); list_del(&data->list); if (!list_empty(&scanning_hosts)) { struct async_scan_data *next = list_entry(scanning_hosts.next, struct async_scan_data, list); complete(&next->prev_finished); } spin_unlock(&async_scan_lock); scsi_autopm_put_host(shost); scsi_host_put(shost); kfree(data); } static void do_scsi_scan_host(struct Scsi_Host *shost) { if (shost->hostt->scan_finished) { unsigned long start = jiffies; if (shost->hostt->scan_start) shost->hostt->scan_start(shost); while (!shost->hostt->scan_finished(shost, jiffies - start)) msleep(10); } else { scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); } } static void do_scan_async(void *_data, async_cookie_t c) { struct async_scan_data *data = _data; struct Scsi_Host *shost = data->shost; do_scsi_scan_host(shost); scsi_finish_async_scan(data); } /** * scsi_scan_host - scan the given adapter * @shost: adapter to scan **/ void scsi_scan_host(struct Scsi_Host *shost) { struct async_scan_data *data; if (strncmp(scsi_scan_type, "none", 4) == 0 || strncmp(scsi_scan_type, "manual", 6) == 0) return; if (scsi_autopm_get_host(shost) < 0) return; data = scsi_prep_async_scan(shost); if (!data) { do_scsi_scan_host(shost); scsi_autopm_put_host(shost); return; } /* register with the async subsystem so wait_for_device_probe() * will flush this work */ async_schedule(do_scan_async, data); /* scsi_autopm_put_host(shost) is called in scsi_finish_async_scan() */ } EXPORT_SYMBOL(scsi_scan_host); void scsi_forget_host(struct Scsi_Host *shost) { struct scsi_device *sdev; unsigned long flags; restart: spin_lock_irqsave(shost->host_lock, flags); list_for_each_entry(sdev, &shost->__devices, siblings) { if (sdev->sdev_state == SDEV_DEL) continue; spin_unlock_irqrestore(shost->host_lock, flags); __scsi_remove_device(sdev); goto restart; } spin_unlock_irqrestore(shost->host_lock, flags); }
363 306 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _DELAYED_CALL_H #define _DELAYED_CALL_H /* * Poor man's closures; I wish we could've done them sanely polymorphic, * but... */ struct delayed_call { void (*fn)(void *); void *arg; }; #define DEFINE_DELAYED_CALL(name) struct delayed_call name = {NULL, NULL} /* I really wish we had closures with sane typechecking... */ static inline void set_delayed_call(struct delayed_call *call, void (*fn)(void *), void *arg) { call->fn = fn; call->arg = arg; } static inline void do_delayed_call(struct delayed_call *call) { if (call->fn) call->fn(call->arg); } static inline void clear_delayed_call(struct delayed_call *call) { call->fn = NULL; } #endif
3 3 3 1 1 8 8 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the bitmap:port type */ #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/netlink.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <net/netlink.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> #include <linux/netfilter/ipset/ip_set_getport.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Counter support added */ /* 2 Comment support added */ #define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:port"); #define MTYPE bitmap_port /* Type structure */ struct bitmap_port { unsigned long *members; /* the set members */ u16 first_port; /* host byte order, included in range */ u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ struct timer_list gc; /* garbage collection */ struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[] /* data extensions */ __aligned(__alignof__(u64)); }; /* ADT structure for generic function args */ struct bitmap_port_adt_elem { u16 id; }; static u16 port_to_id(const struct bitmap_port *m, u16 port) { return port - m->first_port; } /* Common functions */ static int bitmap_port_do_test(const struct bitmap_port_adt_elem *e, const struct bitmap_port *map, size_t dsize) { return !!test_bit(e->id, map->members); } static int bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize) { return !!test_bit(id, map->members); } static int bitmap_port_do_add(const struct bitmap_port_adt_elem *e, struct bitmap_port *map, u32 flags, size_t dsize) { return !!test_bit(e->id, map->members); } static int bitmap_port_do_del(const struct bitmap_port_adt_elem *e, struct bitmap_port *map) { return !test_and_clear_bit(e->id, map->members); } static int bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id, size_t dsize) { return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port + id)); } static int bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map) { return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) || nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); } static bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port) { bool ret; u8 proto; switch (pf) { case NFPROTO_IPV4: ret = ip_set_get_ip4_port(skb, src, port, &proto); break; case NFPROTO_IPV6: ret = ip_set_get_ip6_port(skb, src, port, &proto); break; default: return false; } if (!ret) return ret; switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: return true; default: return false; } } static int bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be16 __port; u16 port = 0; if (!ip_set_get_ip_port(skb, opt->family, opt->flags & IPSET_DIM_ONE_SRC, &__port)) return -EINVAL; port = ntohs(__port); if (port < map->first_port || port > map->last_port) return -IPSET_ERR_BITMAP_RANGE; e.id = port_to_id(map, port); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = { .id = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port; /* wraparound */ u16 port_to; int ret = 0; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); if (port < map->first_port || port > map->last_port) return -IPSET_ERR_BITMAP_RANGE; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (adt == IPSET_TEST) { e.id = port_to_id(map, port); return adtfn(set, &e, &ext, &ext, flags); } if (tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) { swap(port, port_to); if (port < map->first_port) return -IPSET_ERR_BITMAP_RANGE; } } else { port_to = port; } if (port_to > map->last_port) return -IPSET_ERR_BITMAP_RANGE; for (; port <= port_to; port++) { e.id = port_to_id(map, port); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } static bool bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) { const struct bitmap_port *x = a->data; const struct bitmap_port *y = b->data; return x->first_port == y->first_port && x->last_port == y->last_port && a->timeout == b->timeout && a->extensions == b->extensions; } /* Plain variant */ struct bitmap_port_elem { }; #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ static bool init_map_port(struct ip_set *set, struct bitmap_port *map, u16 first_port, u16 last_port) { map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_port = first_port; map->last_port = last_port; set->timeout = IPSET_NO_TIMEOUT; map->set = set; set->data = map; set->family = NFPROTO_UNSPEC; return true; } static int bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { struct bitmap_port *map; u16 first_port, last_port; u32 elements; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (first_port > last_port) swap(first_port, last_port); elements = last_port - first_port + 1; set->dsize = ip_set_elem_len(set, tb, 0, 0); map = ip_set_alloc(sizeof(*map) + elements * set->dsize); if (!map) return -ENOMEM; map->elements = elements; map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { ip_set_free(map); return -ENOMEM; } if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_port_gc_init(set, bitmap_port_gc); } return 0; } static struct ip_set_type bitmap_port_type = { .name = "bitmap:port", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init bitmap_port_init(void) { return ip_set_type_register(&bitmap_port_type); } static void __exit bitmap_port_fini(void) { rcu_barrier(); ip_set_type_unregister(&bitmap_port_type); } module_init(bitmap_port_init); module_exit(bitmap_port_fini);
1 1 1 1 2 3 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 // SPDX-License-Identifier: GPL-2.0-or-later /* * PCBC: Propagating Cipher Block Chaining mode * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * Derived from cbc.c * - Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/algapi.h> #include <crypto/internal/cipher.h> #include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> static int crypto_pcbc_encrypt_segment(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; u8 * const iv = walk->iv; do { crypto_xor(iv, src, bsize); crypto_cipher_encrypt_one(tfm, dst, iv); crypto_xor_cpy(iv, dst, src, bsize); src += bsize; dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 * const iv = walk->iv; u8 tmpbuf[MAX_CIPHER_BLOCKSIZE]; do { memcpy(tmpbuf, src, bsize); crypto_xor(iv, src, bsize); crypto_cipher_encrypt_one(tfm, src, iv); crypto_xor_cpy(iv, tmpbuf, src, bsize); src += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cipher *cipher = skcipher_cipher_simple(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { if (walk.src.virt.addr == walk.dst.virt.addr) nbytes = crypto_pcbc_encrypt_inplace(req, &walk, cipher); else nbytes = crypto_pcbc_encrypt_segment(req, &walk, cipher); err = skcipher_walk_done(&walk, nbytes); } return err; } static int crypto_pcbc_decrypt_segment(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *dst = walk->dst.virt.addr; u8 * const iv = walk->iv; do { crypto_cipher_decrypt_one(tfm, dst, src); crypto_xor(dst, iv, bsize); crypto_xor_cpy(iv, dst, src, bsize); src += bsize; dst += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, struct skcipher_walk *walk, struct crypto_cipher *tfm) { int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 * const iv = walk->iv; u8 tmpbuf[MAX_CIPHER_BLOCKSIZE] __aligned(__alignof__(u32)); do { memcpy(tmpbuf, src, bsize); crypto_cipher_decrypt_one(tfm, src, src); crypto_xor(src, iv, bsize); crypto_xor_cpy(iv, src, tmpbuf, bsize); src += bsize; } while ((nbytes -= bsize) >= bsize); return nbytes; } static int crypto_pcbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cipher *cipher = skcipher_cipher_simple(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { if (walk.src.virt.addr == walk.dst.virt.addr) nbytes = crypto_pcbc_decrypt_inplace(req, &walk, cipher); else nbytes = crypto_pcbc_decrypt_segment(req, &walk, cipher); err = skcipher_walk_done(&walk, nbytes); } return err; } static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_instance *inst; int err; inst = skcipher_alloc_instance_simple(tmpl, tb); if (IS_ERR(inst)) return PTR_ERR(inst); inst->alg.encrypt = crypto_pcbc_encrypt; inst->alg.decrypt = crypto_pcbc_decrypt; err = skcipher_register_instance(tmpl, inst); if (err) inst->free(inst); return err; } static struct crypto_template crypto_pcbc_tmpl = { .name = "pcbc", .create = crypto_pcbc_create, .module = THIS_MODULE, }; static int __init crypto_pcbc_module_init(void) { return crypto_register_template(&crypto_pcbc_tmpl); } static void __exit crypto_pcbc_module_exit(void) { crypto_unregister_template(&crypto_pcbc_tmpl); } subsys_initcall(crypto_pcbc_module_init); module_exit(crypto_pcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("PCBC block cipher mode of operation"); MODULE_ALIAS_CRYPTO("pcbc"); MODULE_IMPORT_NS(CRYPTO_INTERNAL);
10 45 15 28 8 7 7 3452 3466 16 7 6 7 7 24 1 1 1 21 2 19 1 20 6 7 3 4 7 7 7 6 1 6 1 4 4 4 1 3 4 4 4 4 4 4 4 4 4 4 3 1 1 3 804 15 9 2 6 7 7 5 16 7 22 13 21 22 21 15 14 13 15 15 13 15 15 7 7 4 3 3 1 4 818 815 817 7 7 7 7 7 1 7 6 4 4 5 4 35 3 9 28 803 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 // SPDX-License-Identifier: GPL-2.0 /* * gendisk handling * * Portions Copyright (C) 2020 Christoph Hellwig */ #include <linux/module.h> #include <linux/ctype.h> #include <linux/fs.h> #include <linux/kdev_t.h> #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/idr.h> #include <linux/log2.h> #include <linux/pm_runtime.h> #include <linux/badblocks.h> #include <linux/part_stat.h> #include <linux/blktrace_api.h> #include "blk-throttle.h" #include "blk.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" #include "blk-cgroup.h" static struct kobject *block_depr; /* * Unique, monotonically increasing sequential number associated with block * devices instances (i.e. incremented each time a device is attached). * Associating uevents with block devices in userspace is difficult and racy: * the uevent netlink socket is lossy, and on slow and overloaded systems has * a very high latency. * Block devices do not have exclusive owners in userspace, any process can set * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0 * can be reused again and again). * A userspace process setting up a block device and watching for its events * cannot thus reliably tell whether an event relates to the device it just set * up or another earlier instance with the same name. * This sequential number allows userspace processes to solve this problem, and * uniquely associate an uevent to the lifetime to a device. */ static atomic64_t diskseq; /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) static DEFINE_IDA(ext_devt_ida); void set_capacity(struct gendisk *disk, sector_t sectors) { bdev_set_nr_sectors(disk->part0, sectors); } EXPORT_SYMBOL(set_capacity); /* * Set disk capacity and notify if the size is not currently zero and will not * be set to zero. Returns true if a uevent was sent, otherwise false. */ bool set_capacity_and_notify(struct gendisk *disk, sector_t size) { sector_t capacity = get_capacity(disk); char *envp[] = { "RESIZE=1", NULL }; set_capacity(disk, size); /* * Only print a message and send a uevent if the gendisk is user visible * and alive. This avoids spamming the log and udev when setting the * initial capacity during probing. */ if (size == capacity || !disk_live(disk) || (disk->flags & GENHD_FL_HIDDEN)) return false; pr_info("%s: detected capacity change from %lld to %lld\n", disk->disk_name, capacity, size); /* * Historically we did not send a uevent for changes to/from an empty * device. */ if (!capacity || !size) return false; kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); return true; } EXPORT_SYMBOL_GPL(set_capacity_and_notify); static void part_stat_read_all(struct block_device *part, struct disk_stats *stat) { int cpu; memset(stat, 0, sizeof(struct disk_stats)); for_each_possible_cpu(cpu) { struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); int group; for (group = 0; group < NR_STAT_GROUPS; group++) { stat->nsecs[group] += ptr->nsecs[group]; stat->sectors[group] += ptr->sectors[group]; stat->ios[group] += ptr->ios[group]; stat->merges[group] += ptr->merges[group]; } stat->io_ticks += ptr->io_ticks; } } unsigned int part_in_flight(struct block_device *part) { unsigned int inflight = 0; int cpu; for_each_possible_cpu(cpu) { inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) + part_stat_local_read_cpu(part, in_flight[1], cpu); } if ((int)inflight < 0) inflight = 0; return inflight; } static void part_in_flight_rw(struct block_device *part, unsigned int inflight[2]) { int cpu; inflight[0] = 0; inflight[1] = 0; for_each_possible_cpu(cpu) { inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu); inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu); } if ((int)inflight[0] < 0) inflight[0] = 0; if ((int)inflight[1] < 0) inflight[1] = 0; } /* * Can be deleted altogether. Later. * */ #define BLKDEV_MAJOR_HASH_SIZE 255 static struct blk_major_name { struct blk_major_name *next; int major; char name[16]; #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD void (*probe)(dev_t devt); #endif } *major_names[BLKDEV_MAJOR_HASH_SIZE]; static DEFINE_MUTEX(major_names_lock); static DEFINE_SPINLOCK(major_names_spinlock); /* index in the above - for now: assume no multimajor ranges */ static inline int major_to_index(unsigned major) { return major % BLKDEV_MAJOR_HASH_SIZE; } #ifdef CONFIG_PROC_FS void blkdev_show(struct seq_file *seqf, off_t offset) { struct blk_major_name *dp; spin_lock(&major_names_spinlock); for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) if (dp->major == offset) seq_printf(seqf, "%3d %s\n", dp->major, dp->name); spin_unlock(&major_names_spinlock); } #endif /* CONFIG_PROC_FS */ /** * __register_blkdev - register a new block device * * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If * @major = 0, try to allocate any unused major number. * @name: the name of the new block device as a zero terminated string * @probe: pre-devtmpfs / pre-udev callback used to create disks when their * pre-created device node is accessed. When a probe call uses * add_disk() and it fails the driver must cleanup resources. This * interface may soon be removed. * * The @name must be unique within the system. * * The return value depends on the @major input parameter: * * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1] * then the function returns zero on success, or a negative error code * - if any unused major number was requested with @major = 0 parameter * then the return value is the allocated major number in range * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise * * See Documentation/admin-guide/devices.txt for the list of allocated * major numbers. * * Use register_blkdev instead for any new code. */ int __register_blkdev(unsigned int major, const char *name, void (*probe)(dev_t devt)) { struct blk_major_name **n, *p; int index, ret = 0; mutex_lock(&major_names_lock); /* temporary */ if (major == 0) { for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { if (major_names[index] == NULL) break; } if (index == 0) { printk("%s: failed to get major for %s\n", __func__, name); ret = -EBUSY; goto out; } major = index; ret = major; } if (major >= BLKDEV_MAJOR_MAX) { pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n", __func__, major, BLKDEV_MAJOR_MAX-1, name); ret = -EINVAL; goto out; } p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); if (p == NULL) { ret = -ENOMEM; goto out; } p->major = major; #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD p->probe = probe; #endif strscpy(p->name, name, sizeof(p->name)); p->next = NULL; index = major_to_index(major); spin_lock(&major_names_spinlock); for (n = &major_names[index]; *n; n = &(*n)->next) { if ((*n)->major == major) break; } if (!*n) *n = p; else ret = -EBUSY; spin_unlock(&major_names_spinlock); if (ret < 0) { printk("register_blkdev: cannot get major %u for %s\n", major, name); kfree(p); } out: mutex_unlock(&major_names_lock); return ret; } EXPORT_SYMBOL(__register_blkdev); void unregister_blkdev(unsigned int major, const char *name) { struct blk_major_name **n; struct blk_major_name *p = NULL; int index = major_to_index(major); mutex_lock(&major_names_lock); spin_lock(&major_names_spinlock); for (n = &major_names[index]; *n; n = &(*n)->next) if ((*n)->major == major) break; if (!*n || strcmp((*n)->name, name)) { WARN_ON(1); } else { p = *n; *n = p->next; } spin_unlock(&major_names_spinlock); mutex_unlock(&major_names_lock); kfree(p); } EXPORT_SYMBOL(unregister_blkdev); int blk_alloc_ext_minor(void) { int idx; idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL); if (idx == -ENOSPC) return -EBUSY; return idx; } void blk_free_ext_minor(unsigned int minor) { ida_free(&ext_devt_ida, minor); } void disk_uevent(struct gendisk *disk, enum kobject_action action) { struct block_device *part; unsigned long idx; rcu_read_lock(); xa_for_each(&disk->part_tbl, idx, part) { if (bdev_is_partition(part) && !bdev_nr_sectors(part)) continue; if (!kobject_get_unless_zero(&part->bd_device.kobj)) continue; rcu_read_unlock(); kobject_uevent(bdev_kobj(part), action); put_device(&part->bd_device); rcu_read_lock(); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(disk_uevent); int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) { struct file *file; int ret = 0; if (!disk_has_partscan(disk)) return -EINVAL; if (disk->open_partitions) return -EBUSY; /* * If the device is opened exclusively by current thread already, it's * safe to scan partitons, otherwise, use bd_prepare_to_claim() to * synchronize with other exclusive openers and other partition * scanners. */ if (!(mode & BLK_OPEN_EXCL)) { ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions, NULL); if (ret) return ret; } set_bit(GD_NEED_PART_SCAN, &disk->state); file = bdev_file_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, NULL, NULL); if (IS_ERR(file)) ret = PTR_ERR(file); else fput(file); /* * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, * and this will cause that re-assemble partitioned raid device will * creat partition for underlying disk. */ clear_bit(GD_NEED_PART_SCAN, &disk->state); if (!(mode & BLK_OPEN_EXCL)) bd_abort_claiming(disk->part0, disk_scan_partitions); return ret; } /** * device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information * @groups: Additional per-device sysfs groups * * This function registers the partitioning information in @disk * with the kernel. */ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups) { struct device *ddev = disk_to_dev(disk); int ret; /* Only makes sense for bio-based to set ->poll_bio */ if (queue_is_mq(disk->queue) && disk->fops->poll_bio) return -EINVAL; /* * The disk queue should now be all set with enough information about * the device for the elevator code to pick an adequate default * elevator if one is needed, that is, for devices requesting queue * registration. */ elevator_init_mq(disk->queue); /* Mark bdev as having a submit_bio, if needed */ if (disk->fops->submit_bio) bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO); /* * If the driver provides an explicit major number it also must provide * the number of minors numbers supported, and those will be used to * setup the gendisk. * Otherwise just allocate the device numbers for both the whole device * and all partitions from the extended dev_t space. */ ret = -EINVAL; if (disk->major) { if (WARN_ON(!disk->minors)) goto out_exit_elevator; if (disk->minors > DISK_MAX_PARTS) { pr_err("block: can't allocate more than %d partitions\n", DISK_MAX_PARTS); disk->minors = DISK_MAX_PARTS; } if (disk->first_minor > MINORMASK ||