1 5 1 3 1 1 4 1 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include <linux/uaccess.h> #include <drm/drm_atomic.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_crtc.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" /** * DOC: overview * * Color management or color space adjustments is supported through a set of 5 * properties on the &drm_crtc object. They are set up by calling * drm_crtc_enable_color_mgmt(). * * "DEGAMMA_LUT”: * Blob property to set the degamma lookup table (LUT) mapping pixel data * from the framebuffer before it is given to the transformation matrix. * The data is interpreted as an array of &struct drm_color_lut elements. * Hardware might choose not to use the full precision of the LUT elements * nor use all the elements of the LUT (for example the hardware might * choose to interpolate between LUT[0] and LUT[4]). * * Setting this to NULL (blob property value set to 0) means a * linear/pass-thru gamma table should be used. This is generally the * driver boot-up state too. Drivers can access this blob through * &drm_crtc_state.degamma_lut. * * “DEGAMMA_LUT_SIZE”: * Unsinged range property to give the size of the lookup table to be set * on the DEGAMMA_LUT property (the size depends on the underlying * hardware). If drivers support multiple LUT sizes then they should * publish the largest size, and sub-sample smaller sized LUTs (e.g. for * split-gamma modes) appropriately. * * “CTM”: * Blob property to set the current transformation matrix (CTM) apply to * pixel data after the lookup through the degamma LUT and before the * lookup through the gamma LUT. The data is interpreted as a struct * &drm_color_ctm. * * Setting this to NULL (blob property value set to 0) means a * unit/pass-thru matrix should be used. This is generally the driver * boot-up state too. Drivers can access the blob for the color conversion * matrix through &drm_crtc_state.ctm. * * “GAMMA_LUT”: * Blob property to set the gamma lookup table (LUT) mapping pixel data * after the transformation matrix to data sent to the connector. The * data is interpreted as an array of &struct drm_color_lut elements. * Hardware might choose not to use the full precision of the LUT elements * nor use all the elements of the LUT (for example the hardware might * choose to interpolate between LUT[0] and LUT[4]). * * Setting this to NULL (blob property value set to 0) means a * linear/pass-thru gamma table should be used. This is generally the * driver boot-up state too. Drivers can access this blob through * &drm_crtc_state.gamma_lut. * * Note that for mostly historical reasons stemming from Xorg heritage, * this is also used to store the color map (also sometimes color lut, CLUT * or color palette) for indexed formats like DRM_FORMAT_C8. * * “GAMMA_LUT_SIZE”: * Unsigned range property to give the size of the lookup table to be set * on the GAMMA_LUT property (the size depends on the underlying hardware). * If drivers support multiple LUT sizes then they should publish the * largest size, and sub-sample smaller sized LUTs (e.g. for split-gamma * modes) appropriately. * * There is also support for a legacy gamma table, which is set up by calling * drm_mode_crtc_set_gamma_size(). The DRM core will then alias the legacy gamma * ramp with "GAMMA_LUT" or, if that is unavailable, "DEGAMMA_LUT". * * Support for different non RGB color encodings is controlled through * &drm_plane specific COLOR_ENCODING and COLOR_RANGE properties. They * are set up by calling drm_plane_create_color_properties(). * * "COLOR_ENCODING": * Optional plane enum property to support different non RGB * color encodings. The driver can provide a subset of standard * enum values supported by the DRM plane. * * "COLOR_RANGE": * Optional plane enum property to support different non RGB * color parameter ranges. The driver can provide a subset of * standard enum values supported by the DRM plane. */ /** * drm_color_ctm_s31_32_to_qm_n * * @user_input: input value * @m: number of integer bits, only support m <= 32, include the sign-bit * @n: number of fractional bits, only support n <= 32 * * Convert and clamp S31.32 sign-magnitude to Qm.n (signed 2's complement). * The sign-bit BIT(m+n-1) and above are 0 for positive value and 1 for negative * the range of value is [-2^(m-1), 2^(m-1) - 2^-n] * * For example * A Q3.12 format number: * - required bit: 3 + 12 = 15bits * - range: [-2^2, 2^2 - 2^−15] * * NOTE: the m can be zero if all bit_precision are used to present fractional * bits like Q0.32 */ u64 drm_color_ctm_s31_32_to_qm_n(u64 user_input, u32 m, u32 n) { u64 mag = (user_input & ~BIT_ULL(63)) >> (32 - n); bool negative = !!(user_input & BIT_ULL(63)); s64 val; WARN_ON(m > 32 || n > 32); val = clamp_val(mag, 0, negative ? BIT_ULL(n + m - 1) : BIT_ULL(n + m - 1) - 1); return negative ? -val : val; } EXPORT_SYMBOL(drm_color_ctm_s31_32_to_qm_n); /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC * @degamma_lut_size: the size of the degamma lut (before CSC) * @has_ctm: whether to attach ctm_property for CSC matrix * @gamma_lut_size: the size of the gamma lut (after CSC) * * This function lets the driver enable the color correction * properties on a CRTC. This includes 3 degamma, csc and gamma * properties that userspace can set and 2 size properties to inform * the userspace of the lut sizes. Each of the properties are * optional. The gamma and degamma properties are only attached if * their size is not 0 and ctm_property is only attached if has_ctm is * true. */ void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, bool has_ctm, uint gamma_lut_size) { struct drm_device *dev = crtc->dev; struct drm_mode_config *config = &dev->mode_config; if (degamma_lut_size) { drm_object_attach_property(&crtc->base, config->degamma_lut_property, 0); drm_object_attach_property(&crtc->base, config->degamma_lut_size_property, degamma_lut_size); } if (has_ctm) drm_object_attach_property(&crtc->base, config->ctm_property, 0); if (gamma_lut_size) { drm_object_attach_property(&crtc->base, config->gamma_lut_property, 0); drm_object_attach_property(&crtc->base, config->gamma_lut_size_property, gamma_lut_size); } } EXPORT_SYMBOL(drm_crtc_enable_color_mgmt); /** * drm_mode_crtc_set_gamma_size - set the gamma table size * @crtc: CRTC to set the gamma table size for * @gamma_size: size of the gamma table * * Drivers which support gamma tables should set this to the supported gamma * table size when initializing the CRTC. Currently the drm core only supports a * fixed gamma table size. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, int gamma_size) { uint16_t *r_base, *g_base, *b_base; int i; crtc->gamma_size = gamma_size; crtc->gamma_store = kcalloc(gamma_size, sizeof(uint16_t) * 3, GFP_KERNEL); if (!crtc->gamma_store) { crtc->gamma_size = 0; return -ENOMEM; } r_base = crtc->gamma_store; g_base = r_base + gamma_size; b_base = g_base + gamma_size; for (i = 0; i < gamma_size; i++) { r_base[i] = i << 8; g_base[i] = i << 8; b_base[i] = i << 8; } return 0; } EXPORT_SYMBOL(drm_mode_crtc_set_gamma_size); /** * drm_crtc_supports_legacy_gamma - does the crtc support legacy gamma correction table * @crtc: CRTC object * * Returns true/false if the given crtc supports setting the legacy gamma * correction table. */ static bool drm_crtc_supports_legacy_gamma(struct drm_crtc *crtc) { u32 gamma_id = crtc->dev->mode_config.gamma_lut_property->base.id; u32 degamma_id = crtc->dev->mode_config.degamma_lut_property->base.id; if (!crtc->gamma_size) return false; if (crtc->funcs->gamma_set) return true; return !!(drm_mode_obj_find_prop_id(&crtc->base, gamma_id) || drm_mode_obj_find_prop_id(&crtc->base, degamma_id)); } /** * drm_crtc_legacy_gamma_set - set the legacy gamma correction table * @crtc: CRTC object * @red: red correction table * @green: green correction table * @blue: blue correction table * @size: size of the tables * @ctx: lock acquire context * * Implements support for legacy gamma correction table for drivers * that have set drm_crtc_funcs.gamma_set or that support color management * through the DEGAMMA_LUT/GAMMA_LUT properties. See * drm_crtc_enable_color_mgmt() and the containing chapter for * how the atomic color management and gamma tables work. * * This function sets the gamma using drm_crtc_funcs.gamma_set if set, or * alternatively using crtc color management properties. */ static int drm_crtc_legacy_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, u32 size, struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->dev; struct drm_atomic_state *state; struct drm_crtc_state *crtc_state; struct drm_property_blob *blob; struct drm_color_lut *blob_data; u32 gamma_id = dev->mode_config.gamma_lut_property->base.id; u32 degamma_id = dev->mode_config.degamma_lut_property->base.id; bool use_gamma_lut; int i, ret = 0; bool replaced; if (crtc->funcs->gamma_set) return crtc->funcs->gamma_set(crtc, red, green, blue, size, ctx); if (drm_mode_obj_find_prop_id(&crtc->base, gamma_id)) use_gamma_lut = true; else if (drm_mode_obj_find_prop_id(&crtc->base, degamma_id)) use_gamma_lut = false; else return -ENODEV; state = drm_atomic_state_alloc(crtc->dev); if (!state) return -ENOMEM; blob = drm_property_create_blob(dev, sizeof(struct drm_color_lut) * size, NULL); if (IS_ERR(blob)) { ret = PTR_ERR(blob); blob = NULL; goto fail; } /* Prepare GAMMA_LUT with the legacy values. */ blob_data = blob->data; for (i = 0; i < size; i++) { blob_data[i].red = red[i]; blob_data[i].green = green[i]; blob_data[i].blue = blue[i]; } state->acquire_ctx = ctx; crtc_state = drm_atomic_get_crtc_state(state, crtc); if (IS_ERR(crtc_state)) { ret = PTR_ERR(crtc_state); goto fail; } /* Set GAMMA_LUT and reset DEGAMMA_LUT and CTM */ replaced = drm_property_replace_blob(&crtc_state->degamma_lut, use_gamma_lut ? NULL : blob); replaced |= drm_property_replace_blob(&crtc_state->ctm, NULL); replaced |= drm_property_replace_blob(&crtc_state->gamma_lut, use_gamma_lut ? blob : NULL); crtc_state->color_mgmt_changed |= replaced; ret = drm_atomic_commit(state); fail: drm_atomic_state_put(state); drm_property_blob_put(blob); return ret; } /** * drm_mode_gamma_set_ioctl - set the gamma table * @dev: DRM device * @data: ioctl data * @file_priv: DRM file info * * Set the gamma table of a CRTC to the one passed in by the user. Userspace can * inquire the required gamma table size through drm_mode_gamma_get_ioctl. * * Called by the user via ioctl. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_gamma_set_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_crtc_lut *crtc_lut = data; struct drm_crtc *crtc; void *r_base, *g_base, *b_base; int size; struct drm_modeset_acquire_ctx ctx; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; crtc = drm_crtc_find(dev, file_priv, crtc_lut->crtc_id); if (!crtc) return -ENOENT; if (!drm_crtc_supports_legacy_gamma(crtc)) return -ENOSYS; /* memcpy into gamma store */ if (crtc_lut->gamma_size != crtc->gamma_size) return -EINVAL; DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret); size = crtc_lut->gamma_size * (sizeof(uint16_t)); r_base = crtc->gamma_store; if (copy_from_user(r_base, (void __user *)(unsigned long)crtc_lut->red, size)) { ret = -EFAULT; goto out; } g_base = r_base + size; if (copy_from_user(g_base, (void __user *)(unsigned long)crtc_lut->green, size)) { ret = -EFAULT; goto out; } b_base = g_base + size; if (copy_from_user(b_base, (void __user *)(unsigned long)crtc_lut->blue, size)) { ret = -EFAULT; goto out; } ret = drm_crtc_legacy_gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size, &ctx); out: DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); return ret; } /** * drm_mode_gamma_get_ioctl - get the gamma table * @dev: DRM device * @data: ioctl data * @file_priv: DRM file info * * Copy the current gamma table into the storage provided. This also provides * the gamma table size the driver expects, which can be used to size the * allocated storage. * * Called by the user via ioctl. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_gamma_get_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_crtc_lut *crtc_lut = data; struct drm_crtc *crtc; void *r_base, *g_base, *b_base; int size; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; crtc = drm_crtc_find(dev, file_priv, crtc_lut->crtc_id); if (!crtc) return -ENOENT; /* memcpy into gamma store */ if (crtc_lut->gamma_size != crtc->gamma_size) return -EINVAL; drm_modeset_lock(&crtc->mutex, NULL); size = crtc_lut->gamma_size * (sizeof(uint16_t)); r_base = crtc->gamma_store; if (copy_to_user((void __user *)(unsigned long)crtc_lut->red, r_base, size)) { ret = -EFAULT; goto out; } g_base = r_base + size; if (copy_to_user((void __user *)(unsigned long)crtc_lut->green, g_base, size)) { ret = -EFAULT; goto out; } b_base = g_base + size; if (copy_to_user((void __user *)(unsigned long)crtc_lut->blue, b_base, size)) { ret = -EFAULT; goto out; } out: drm_modeset_unlock(&crtc->mutex); return ret; } static const char * const color_encoding_name[] = { [DRM_COLOR_YCBCR_BT601] = "ITU-R BT.601 YCbCr", [DRM_COLOR_YCBCR_BT709] = "ITU-R BT.709 YCbCr", [DRM_COLOR_YCBCR_BT2020] = "ITU-R BT.2020 YCbCr", }; static const char * const color_range_name[] = { [DRM_COLOR_YCBCR_FULL_RANGE] = "YCbCr full range", [DRM_COLOR_YCBCR_LIMITED_RANGE] = "YCbCr limited range", }; /** * drm_get_color_encoding_name - return a string for color encoding * @encoding: color encoding to compute name of * * In contrast to the other drm_get_*_name functions this one here returns a * const pointer and hence is threadsafe. */ const char *drm_get_color_encoding_name(enum drm_color_encoding encoding) { if (WARN_ON(encoding >= ARRAY_SIZE(color_encoding_name))) return "unknown"; return color_encoding_name[encoding]; } /** * drm_get_color_range_name - return a string for color range * @range: color range to compute name of * * In contrast to the other drm_get_*_name functions this one here returns a * const pointer and hence is threadsafe. */ const char *drm_get_color_range_name(enum drm_color_range range) { if (WARN_ON(range >= ARRAY_SIZE(color_range_name))) return "unknown"; return color_range_name[range]; } /** * drm_plane_create_color_properties - color encoding related plane properties * @plane: plane object * @supported_encodings: bitfield indicating supported color encodings * @supported_ranges: bitfileld indicating supported color ranges * @default_encoding: default color encoding * @default_range: default color range * * Create and attach plane specific COLOR_ENCODING and COLOR_RANGE * properties to @plane. The supported encodings and ranges should * be provided in supported_encodings and supported_ranges bitmasks. * Each bit set in the bitmask indicates that its number as enum * value is supported. */ int drm_plane_create_color_properties(struct drm_plane *plane, u32 supported_encodings, u32 supported_ranges, enum drm_color_encoding default_encoding, enum drm_color_range default_range) { struct drm_device *dev = plane->dev; struct drm_property *prop; struct drm_prop_enum_list enum_list[MAX_T(int, DRM_COLOR_ENCODING_MAX, DRM_COLOR_RANGE_MAX)]; int i, len; if (WARN_ON(supported_encodings == 0 || (supported_encodings & -BIT(DRM_COLOR_ENCODING_MAX)) != 0 || (supported_encodings & BIT(default_encoding)) == 0)) return -EINVAL; if (WARN_ON(supported_ranges == 0 || (supported_ranges & -BIT(DRM_COLOR_RANGE_MAX)) != 0 || (supported_ranges & BIT(default_range)) == 0)) return -EINVAL; len = 0; for (i = 0; i < DRM_COLOR_ENCODING_MAX; i++) { if ((supported_encodings & BIT(i)) == 0) continue; enum_list[len].type = i; enum_list[len].name = color_encoding_name[i]; len++; } prop = drm_property_create_enum(dev, 0, "COLOR_ENCODING", enum_list, len); if (!prop) return -ENOMEM; plane->color_encoding_property = prop; drm_object_attach_property(&plane->base, prop, default_encoding); if (plane->state) plane->state->color_encoding = default_encoding; len = 0; for (i = 0; i < DRM_COLOR_RANGE_MAX; i++) { if ((supported_ranges & BIT(i)) == 0) continue; enum_list[len].type = i; enum_list[len].name = color_range_name[i]; len++; } prop = drm_property_create_enum(dev, 0, "COLOR_RANGE", enum_list, len); if (!prop) return -ENOMEM; plane->color_range_property = prop; drm_object_attach_property(&plane->base, prop, default_range); if (plane->state) plane->state->color_range = default_range; return 0; } EXPORT_SYMBOL(drm_plane_create_color_properties); /** * drm_color_lut_check - check validity of lookup table * @lut: property blob containing LUT to check * @tests: bitmask of tests to run * * Helper to check whether a userspace-provided lookup table is valid and * satisfies hardware requirements. Drivers pass a bitmask indicating which of * the tests in &drm_color_lut_tests should be performed. * * Returns 0 on success, -EINVAL on failure. */ int drm_color_lut_check(const struct drm_property_blob *lut, u32 tests) { const struct drm_color_lut *entry; int i; if (!lut || !tests) return 0; entry = lut->data; for (i = 0; i < drm_color_lut_size(lut); i++) { if (tests & DRM_COLOR_LUT_EQUAL_CHANNELS) { if (entry[i].red != entry[i].blue || entry[i].red != entry[i].green) { DRM_DEBUG_KMS("All LUT entries must have equal r/g/b\n"); return -EINVAL; } } if (i > 0 && tests & DRM_COLOR_LUT_NON_DECREASING) { if (entry[i].red < entry[i - 1].red || entry[i].green < entry[i - 1].green || entry[i].blue < entry[i - 1].blue) { DRM_DEBUG_KMS("LUT entries must never decrease.\n"); return -EINVAL; } } } return 0; } EXPORT_SYMBOL(drm_color_lut_check); |
14 26 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H #include <asm/clocksource.h> #include <asm/pvclock-abi.h> struct timespec64; /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec64 *ts); void pvclock_resume(void); void pvclock_touch_watchdogs(void); static __always_inline unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) { unsigned version = src->version & ~1; /* Make sure that the version is read before the data. */ virt_rmb(); return version; } static __always_inline bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, unsigned version) { /* Make sure that the version is re-read after the data. */ virt_rmb(); return unlikely(version != src->version); } /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) { u64 product; #ifdef __i386__ u32 tmp1, tmp2; #else ulong tmp; #endif if (shift < 0) delta >>= -shift; else delta <<= shift; #ifdef __i386__ __asm__ ( "mul %5 ; " "mov %4,%%eax ; " "mov %%edx,%4 ; " "mul %5 ; " "xor %5,%5 ; " "add %4,%%eax ; " "adc %5,%%edx ; " : "=A" (product), "=r" (tmp1), "=r" (tmp2) : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); #elif defined(__x86_64__) __asm__ ( "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" : [lo]"=a"(product), [hi]"=d"(tmp) : "0"(delta), [mul_frac]"rm"((u64)mul_frac)); #else #error implement me! #endif return product; } static __always_inline u64 __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, u64 tsc) { u64 delta = tsc - src->tsc_timestamp; u64 offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); return src->system_time + offset; } struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) #ifdef CONFIG_PARAVIRT_CLOCK void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti); struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void); #else static inline struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void) { return NULL; } #endif #endif /* _ASM_X86_PVCLOCK_H */ |
950 947 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | // SPDX-License-Identifier: GPL-2.0 /* * Helpers for IOMMU drivers implementing SVA */ #include <linux/mmu_context.h> #include <linux/mutex.h> #include <linux/sched/mm.h> #include <linux/iommu.h> #include "iommu-priv.h" static DEFINE_MUTEX(iommu_sva_lock); static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm); /* Allocate a PASID for the mm within range (inclusive) */ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct device *dev) { struct iommu_mm_data *iommu_mm; ioasid_t pasid; lockdep_assert_held(&iommu_sva_lock); if (!arch_pgtable_dma_compat(mm)) return ERR_PTR(-EBUSY); iommu_mm = mm->iommu_mm; /* Is a PASID already associated with this mm? */ if (iommu_mm) { if (iommu_mm->pasid >= dev->iommu->max_pasids) return ERR_PTR(-EOVERFLOW); return iommu_mm; } iommu_mm = kzalloc(sizeof(struct iommu_mm_data), GFP_KERNEL); if (!iommu_mm) return ERR_PTR(-ENOMEM); pasid = iommu_alloc_global_pasid(dev); if (pasid == IOMMU_PASID_INVALID) { kfree(iommu_mm); return ERR_PTR(-ENOSPC); } iommu_mm->pasid = pasid; INIT_LIST_HEAD(&iommu_mm->sva_domains); /* * Make sure the write to mm->iommu_mm is not reordered in front of * initialization to iommu_mm fields. If it does, readers may see a * valid iommu_mm with uninitialized values. */ smp_store_release(&mm->iommu_mm, iommu_mm); return iommu_mm; } /** * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device * @mm: the mm to bind, caller must hold a reference to mm_users * * Create a bond between device and address space, allowing the device to * access the mm using the PASID returned by iommu_sva_get_pasid(). If a * bond already exists between @device and @mm, an additional internal * reference is taken. Caller must call iommu_sva_unbind_device() * to release each reference. * * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to * initialize the required SVA features. * * On error, returns an ERR_PTR value. */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { struct iommu_group *group = dev->iommu_group; struct iommu_attach_handle *attach_handle; struct iommu_mm_data *iommu_mm; struct iommu_domain *domain; struct iommu_sva *handle; int ret; if (!group) return ERR_PTR(-ENODEV); mutex_lock(&iommu_sva_lock); /* Allocate mm->pasid if necessary. */ iommu_mm = iommu_alloc_mm_data(mm, dev); if (IS_ERR(iommu_mm)) { ret = PTR_ERR(iommu_mm); goto out_unlock; } /* A bond already exists, just take a reference`. */ attach_handle = iommu_attach_handle_get(group, iommu_mm->pasid, IOMMU_DOMAIN_SVA); if (!IS_ERR(attach_handle)) { handle = container_of(attach_handle, struct iommu_sva, handle); if (attach_handle->domain->mm != mm) { ret = -EBUSY; goto out_unlock; } refcount_inc(&handle->users); mutex_unlock(&iommu_sva_lock); return handle; } if (PTR_ERR(attach_handle) != -ENOENT) { ret = PTR_ERR(attach_handle); goto out_unlock; } handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) { ret = -ENOMEM; goto out_unlock; } /* Search for an existing domain. */ list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, &handle->handle); if (!ret) { domain->users++; goto out; } } /* Allocate a new domain and set it on device pasid. */ domain = iommu_sva_domain_alloc(dev, mm); if (IS_ERR(domain)) { ret = PTR_ERR(domain); goto out_free_handle; } ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, &handle->handle); if (ret) goto out_free_domain; domain->users = 1; list_add(&domain->next, &mm->iommu_mm->sva_domains); out: refcount_set(&handle->users, 1); mutex_unlock(&iommu_sva_lock); handle->dev = dev; return handle; out_free_domain: iommu_domain_free(domain); out_free_handle: kfree(handle); out_unlock: mutex_unlock(&iommu_sva_lock); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(iommu_sva_bind_device); /** * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device * @handle: the handle returned by iommu_sva_bind_device() * * Put reference to a bond between device and address space. The device should * not be issuing any more transaction for this PASID. All outstanding page * requests for this PASID must have been flushed to the IOMMU. */ void iommu_sva_unbind_device(struct iommu_sva *handle) { struct iommu_domain *domain = handle->handle.domain; struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm; struct device *dev = handle->dev; mutex_lock(&iommu_sva_lock); if (!refcount_dec_and_test(&handle->users)) { mutex_unlock(&iommu_sva_lock); return; } iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { list_del(&domain->next); iommu_domain_free(domain); } mutex_unlock(&iommu_sva_lock); kfree(handle); } EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); u32 iommu_sva_get_pasid(struct iommu_sva *handle) { struct iommu_domain *domain = handle->handle.domain; return mm_get_enqcmd_pasid(domain->mm); } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); void mm_pasid_drop(struct mm_struct *mm) { struct iommu_mm_data *iommu_mm = mm->iommu_mm; if (!iommu_mm) return; iommu_free_global_pasid(iommu_mm->pasid); kfree(iommu_mm); } /* * I/O page fault handler for SVA */ static enum iommu_page_response_code iommu_sva_handle_mm(struct iommu_fault *fault, struct mm_struct *mm) { vm_fault_t ret; struct vm_area_struct *vma; unsigned int access_flags = 0; unsigned int fault_flags = FAULT_FLAG_REMOTE; struct iommu_fault_page_request *prm = &fault->prm; enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) return status; if (!mmget_not_zero(mm)) return status; mmap_read_lock(mm); vma = vma_lookup(mm, prm->addr); if (!vma) /* Unmapped area */ goto out_put_mm; if (prm->perm & IOMMU_FAULT_PERM_READ) access_flags |= VM_READ; if (prm->perm & IOMMU_FAULT_PERM_WRITE) { access_flags |= VM_WRITE; fault_flags |= FAULT_FLAG_WRITE; } if (prm->perm & IOMMU_FAULT_PERM_EXEC) { access_flags |= VM_EXEC; fault_flags |= FAULT_FLAG_INSTRUCTION; } if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) fault_flags |= FAULT_FLAG_USER; if (access_flags & ~vma->vm_flags) /* Access fault */ goto out_put_mm; ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : IOMMU_PAGE_RESP_SUCCESS; out_put_mm: mmap_read_unlock(mm); mmput(mm); return status; } static void iommu_sva_handle_iopf(struct work_struct *work) { struct iopf_fault *iopf; struct iopf_group *group; enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; group = container_of(work, struct iopf_group, work); list_for_each_entry(iopf, &group->faults, list) { /* * For the moment, errors are sticky: don't handle subsequent * faults in the group if there is an error. */ if (status != IOMMU_PAGE_RESP_SUCCESS) break; status = iommu_sva_handle_mm(&iopf->fault, group->attach_handle->domain->mm); } iopf_group_response(group, status); iopf_free_group(group); } static int iommu_sva_iopf_handler(struct iopf_group *group) { struct iommu_fault_param *fault_param = group->fault_param; INIT_WORK(&group->work, iommu_sva_handle_iopf); if (!queue_work(fault_param->queue->wq, &group->work)) return -EBUSY; return 0; } static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) { const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_domain *domain; if (ops->domain_alloc_sva) { domain = ops->domain_alloc_sva(dev, mm); if (IS_ERR(domain)) return domain; } else { domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); if (!domain) return ERR_PTR(-ENOMEM); } domain->type = IOMMU_DOMAIN_SVA; mmgrab(mm); domain->mm = mm; domain->owner = ops; domain->iopf_handler = iommu_sva_iopf_handler; return domain; } |
15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | // SPDX-License-Identifier: GPL-2.0 #ifndef IORING_CANCEL_H #define IORING_CANCEL_H #include <linux/io_uring_types.h> struct io_cancel_data { struct io_ring_ctx *ctx; union { u64 data; struct file *file; }; u8 opcode; u32 flags; int seq; }; int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags); int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, unsigned int issue_flags); int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg); bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd); static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence) { if (req->cancel_seq_set && sequence == req->work.cancel_seq) return true; req->cancel_seq_set = true; req->work.cancel_seq = sequence; return false; } #endif |
66 6 6 3 2 45 2 2 39 7 8 1 1 1 1 1 1 2 1 5 1 1 1 1 1 11 8 3 7 6 7 7 6 6 1 89 47 42 2 8 5 2 13 2 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/unaligned.h> #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/dccp.h> #include <linux/sctp.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/tcp.h> struct nft_exthdr { u8 type; u8 offset; u8 len; u8 op; u8 dreg; u8 sreg; u8 flags; }; static unsigned int optlen(const u8 *opt, unsigned int offset) { /* Beware zero-length options: make finite progress */ if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0) return 1; else return opt[offset + 1]; } static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len) { if (len % NFT_REG32_SIZE) dest[len / NFT_REG32_SIZE] = 0; return skb_copy_bits(skb, offset, dest, len); } static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; unsigned int offset = 0; int err; if (pkt->skb->protocol != htons(ETH_P_IPV6)) goto err; err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL); if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, err >= 0); return; } else if (err < 0) { goto err; } offset += priv->offset; if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: regs->verdict.code = NFT_BREAK; } /* find the offset to specified option. * * If target header is found, its offset is set in *offset and return option * number. Otherwise, return negative error. * * If the first fragment doesn't contain the End of Options it is considered * invalid. */ static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned int *offset, int target) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; unsigned int start; bool found = false; __be32 info; int optlen; iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (!iph) return -EBADMSG; start = sizeof(struct iphdr); optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); if (optlen <= 0) return -ENOENT; memset(opt, 0, sizeof(struct ip_options)); /* Copy the options since __ip_options_compile() modifies * the options. */ if (skb_copy_bits(skb, start, opt->__data, optlen)) return -EBADMSG; opt->optlen = optlen; if (__ip_options_compile(net, opt, NULL, &info)) return -EBADMSG; switch (target) { case IPOPT_SSRR: case IPOPT_LSRR: if (!opt->srr) break; found = target == IPOPT_SSRR ? opt->is_strictroute : !opt->is_strictroute; if (found) *offset = opt->srr + start; break; case IPOPT_RR: if (!opt->rr) break; *offset = opt->rr + start; found = true; break; case IPOPT_RA: if (!opt->router_alert) break; *offset = opt->router_alert + start; found = true; break; default: return -EOPNOTSUPP; } return found ? target : -ENOENT; } static void nft_exthdr_ipv4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; struct sk_buff *skb = pkt->skb; unsigned int offset; int err; if (skb->protocol != htons(ETH_P_IP)) goto err; err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type); if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, err >= 0); return; } else if (err < 0) { goto err; } offset += priv->offset; if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: regs->verdict.code = NFT_BREAK; } static void * nft_tcp_header_pointer(const struct nft_pktinfo *pkt, unsigned int len, void *buffer, unsigned int *tcphdr_len) { struct tcphdr *tcph; if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) return NULL; tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer); if (!tcph) return NULL; *tcphdr_len = __tcp_hdrlen(tcph); if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len) return NULL; return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer); } static void nft_exthdr_tcp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int i, optl, tcphdr_len, offset; u32 *dest = ®s->data[priv->dreg]; struct tcphdr *tcph; u8 *opt; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { optl = optlen(opt, i); if (priv->type != opt[i]) continue; if (i + optl > tcphdr_len || priv->len + priv->offset > optl) goto err; offset = i + priv->offset; if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, 1); } else { if (priv->len % NFT_REG32_SIZE) dest[priv->len / NFT_REG32_SIZE] = 0; memcpy(dest, opt + offset, priv->len); } return; } err: if (priv->flags & NFT_EXTHDR_F_PRESENT) *dest = 0; else regs->verdict.code = NFT_BREAK; } static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int i, optl, tcphdr_len, offset; struct tcphdr *tcph; u8 *opt; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len)) goto err; tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt)); opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { union { __be16 v16; __be32 v32; } old, new; optl = optlen(opt, i); if (priv->type != opt[i]) continue; if (i + optl > tcphdr_len || priv->len + priv->offset > optl) goto err; offset = i + priv->offset; switch (priv->len) { case 2: old.v16 = (__force __be16)get_unaligned((u16 *)(opt + offset)); new.v16 = (__force __be16)nft_reg_load16( ®s->data[priv->sreg]); switch (priv->type) { case TCPOPT_MSS: /* increase can cause connection to stall */ if (ntohs(old.v16) <= ntohs(new.v16)) return; break; } if (old.v16 == new.v16) return; put_unaligned(new.v16, (__be16*)(opt + offset)); inet_proto_csum_replace2(&tcph->check, pkt->skb, old.v16, new.v16, false); break; case 4: new.v32 = nft_reg_load_be32(®s->data[priv->sreg]); old.v32 = (__force __be32)get_unaligned((u32 *)(opt + offset)); if (old.v32 == new.v32) return; put_unaligned(new.v32, (__be32*)(opt + offset)); inet_proto_csum_replace4(&tcph->check, pkt->skb, old.v32, new.v32, false); break; default: WARN_ON_ONCE(1); break; } return; } return; err: regs->verdict.code = NFT_BREAK; } static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int i, tcphdr_len, optl; struct tcphdr *tcph; u8 *opt; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len)) goto drop; tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt)); opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { unsigned int j; optl = optlen(opt, i); if (priv->type != opt[i]) continue; if (i + optl > tcphdr_len) goto drop; for (j = 0; j < optl; ++j) { u16 n = TCPOPT_NOP; u16 o = opt[i+j]; if ((i + j) % 2 == 0) { o <<= 8; n <<= 8; } inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o), htons(n), false); } memset(opt + i, TCPOPT_NOP, optl); return; } /* option not found, continue. This allows to do multiple * option removals per rule. */ return; err: regs->verdict.code = NFT_BREAK; return; drop: /* can't remove, no choice but to drop */ regs->verdict.code = NF_DROP; } static void nft_exthdr_sctp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr); struct nft_exthdr *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; const struct sctp_chunkhdr *sch; struct sctp_chunkhdr _sch; if (pkt->tprot != IPPROTO_SCTP) goto err; do { sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch); if (!sch || !sch->length) break; if (sch->type == priv->type) { if (priv->flags & NFT_EXTHDR_F_PRESENT) { nft_reg_store8(dest, true); return; } if (priv->offset + priv->len > ntohs(sch->length) || offset + ntohs(sch->length) > pkt->skb->len) break; if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset, dest, priv->len) < 0) break; return; } offset += SCTP_PAD4(ntohs(sch->length)); } while (offset < pkt->skb->len); err: if (priv->flags & NFT_EXTHDR_F_PRESENT) nft_reg_store8(dest, false); else regs->verdict.code = NFT_BREAK; } static void nft_exthdr_dccp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); unsigned int thoff, dataoff, optoff, optlen, i; u32 *dest = ®s->data[priv->dreg]; const struct dccp_hdr *dh; struct dccp_hdr _dh; if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff) goto err; thoff = nft_thoff(pkt); dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh); if (!dh) goto err; dataoff = dh->dccph_doff * sizeof(u32); optoff = __dccp_hdr_len(dh); if (dataoff <= optoff) goto err; optlen = dataoff - optoff; for (i = 0; i < optlen; ) { /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in * the length; the remaining options are at least 2B long. In * all cases, the first byte contains the option type. In * multi-byte options, the second byte contains the option * length, which must be at least two: 1 for the type plus 1 for * the length plus 0-253 for any following option data. We * aren't interested in the option data, only the type and the * length, so we don't need to read more than two bytes at a * time. */ unsigned int buflen = optlen - i; u8 buf[2], *bufp; u8 type, len; if (buflen > sizeof(buf)) buflen = sizeof(buf); bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen, &buf); if (!bufp) goto err; type = bufp[0]; if (type == priv->type) { nft_reg_store8(dest, 1); return; } if (type <= DCCPO_MAX_RESERVED) { i++; continue; } if (buflen < 2) goto err; len = bufp[1]; if (len < 2) goto err; i += len; } err: *dest = 0; } static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, }; static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; int err; if (!tb[NFTA_EXTHDR_DREG] || !tb[NFTA_EXTHDR_TYPE] || !tb[NFTA_EXTHDR_OFFSET] || !tb[NFTA_EXTHDR_LEN]) return -EINVAL; err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); if (err < 0) return err; err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); if (err < 0) return err; if (tb[NFTA_EXTHDR_FLAGS]) { err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags); if (err < 0) return err; if (flags & ~NFT_EXTHDR_F_PRESENT) return -EINVAL; } if (tb[NFTA_EXTHDR_OP]) { err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); if (err < 0) return err; } priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; priv->flags = flags; priv->op = op; return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, priv->len); } static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; int err; if (!tb[NFTA_EXTHDR_SREG] || !tb[NFTA_EXTHDR_TYPE] || !tb[NFTA_EXTHDR_OFFSET] || !tb[NFTA_EXTHDR_LEN]) return -EINVAL; if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS]) return -EINVAL; err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); if (err < 0) return err; err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); if (err < 0) return err; if (offset < 2) return -EOPNOTSUPP; switch (len) { case 2: break; case 4: break; default: return -EOPNOTSUPP; } err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); if (err < 0) return err; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; priv->flags = flags; priv->op = op; return nft_parse_register_load(ctx, tb[NFTA_EXTHDR_SREG], &priv->sreg, priv->len); } static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); if (tb[NFTA_EXTHDR_SREG] || tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS] || tb[NFTA_EXTHDR_OFFSET] || tb[NFTA_EXTHDR_LEN]) return -EINVAL; if (!tb[NFTA_EXTHDR_TYPE]) return -EINVAL; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->op = NFT_EXTHDR_OP_TCPOPT; return 0; } static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); int err = nft_exthdr_init(ctx, expr, tb); if (err < 0) return err; switch (priv->type) { case IPOPT_SSRR: case IPOPT_LSRR: case IPOPT_RR: case IPOPT_RA: break; default: return -EOPNOTSUPP; } return 0; } static int nft_exthdr_dccp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); int err = nft_exthdr_init(ctx, expr, tb); if (err < 0) return err; if (!(priv->flags & NFT_EXTHDR_F_PRESENT)) return -EOPNOTSUPP; return 0; } static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) { if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) return -1; return nft_exthdr_dump_common(skb, priv); } static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg)) return -1; return nft_exthdr_dump_common(skb, priv); } static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_exthdr *priv = nft_expr_priv(expr); return nft_exthdr_dump_common(skb, priv); } static bool nft_exthdr_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_exthdr *priv = nft_expr_priv(expr); const struct nft_exthdr *exthdr; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } exthdr = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->type != exthdr->type || priv->op != exthdr->op || priv->flags != exthdr->flags || priv->offset != exthdr->offset || priv->len != exthdr->len) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_ipv6_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_ipv4_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_ipv4_eval, .init = nft_exthdr_ipv4_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_tcp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_tcp_set_eval, .init = nft_exthdr_tcp_set_init, .dump = nft_exthdr_dump_set, .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_tcp_strip_eval, .init = nft_exthdr_tcp_strip_init, .dump = nft_exthdr_dump_strip, .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_sctp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_sctp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_dccp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_dccp_eval, .init = nft_exthdr_dccp_init, .dump = nft_exthdr_dump, .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops * nft_exthdr_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { u32 op; if (!tb[NFTA_EXTHDR_OP]) return &nft_exthdr_ipv6_ops; if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG]) return ERR_PTR(-EOPNOTSUPP); op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP])); switch (op) { case NFT_EXTHDR_OP_TCPOPT: if (tb[NFTA_EXTHDR_SREG]) return &nft_exthdr_tcp_set_ops; if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_tcp_ops; return &nft_exthdr_tcp_strip_ops; case NFT_EXTHDR_OP_IPV6: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_ipv6_ops; break; case NFT_EXTHDR_OP_IPV4: if (ctx->family != NFPROTO_IPV6) { if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_ipv4_ops; } break; case NFT_EXTHDR_OP_SCTP: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_sctp_ops; break; case NFT_EXTHDR_OP_DCCP: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_dccp_ops; break; } return ERR_PTR(-EOPNOTSUPP); } struct nft_expr_type nft_exthdr_type __read_mostly = { .name = "exthdr", .select_ops = nft_exthdr_select_ops, .policy = nft_exthdr_policy, .maxattr = NFTA_EXTHDR_MAX, .owner = THIS_MODULE, }; |
595 318 317 317 318 317 116 116 63 22 75 13 13 258 251 13 249 91 91 252 111 27 1 117 117 8 117 60 40 10 42 28 21 42 42 33 21 62 39 4 7 1 5 62 58 5 15 22 2 22 2 8 22 8 57 63 62 4 28 1 22 22 3 4 4 3 3 13 10 3 1 70 70 4 6 2 75 6 6 69 4 4 3 4 22 1 22 22 22 22 24 21 4 1 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll * * This abstraction carries sctp events to the ULP (sockets). * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Jon Grimm <jgrimm@us.ibm.com> * La Monte H.P. Yarroll <piggy@acm.org> * Sridhar Samudrala <sri@us.ibm.com> */ #include <linux/slab.h> #include <linux/types.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/busy_poll.h> #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *); static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *, struct sctp_ulpevent *); static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq); /* 1st Level Abstractions */ /* Initialize a ULP queue from a block of memory. */ void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc) { memset(ulpq, 0, sizeof(struct sctp_ulpq)); ulpq->asoc = asoc; skb_queue_head_init(&ulpq->reasm); skb_queue_head_init(&ulpq->reasm_uo); skb_queue_head_init(&ulpq->lobby); ulpq->pd_mode = 0; } /* Flush the reassembly and ordering queues. */ void sctp_ulpq_flush(struct sctp_ulpq *ulpq) { struct sk_buff *skb; struct sctp_ulpevent *event; while ((skb = __skb_dequeue(&ulpq->lobby)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } while ((skb = __skb_dequeue(&ulpq->reasm)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) { event = sctp_skb2event(skb); sctp_ulpevent_free(event); } } /* Dispose of a ulpqueue. */ void sctp_ulpq_free(struct sctp_ulpq *ulpq) { sctp_ulpq_flush(ulpq); } /* Process an incoming DATA chunk. */ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sk_buff_head temp; struct sctp_ulpevent *event; int event_eor = 0; /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); if (!event) return -ENOMEM; event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; /* Do reassembly if needed. */ event = sctp_ulpq_reasm(ulpq, event); /* Do ordering if needed. */ if (event) { /* Create a temporary list to collect chunks on. */ skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); if (event->msg_flags & MSG_EOR) event = sctp_ulpq_order(ulpq, event); } /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ if (event) { event_eor = (event->msg_flags & MSG_EOR) ? 1 : 0; sctp_ulpq_tail_event(ulpq, &temp); } return event_eor; } /* Add a new event for propagation to the ULP. */ /* Clear the partial delivery mode for this socket. Note: This * assumes that no association is currently in partial delivery mode. */ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) { struct sctp_sock *sp = sctp_sk(sk); if (atomic_dec_and_test(&sp->pd_mode)) { /* This means there are no other associations in PD, so * we can go ahead and clear out the lobby in one shot */ if (!skb_queue_empty(&sp->pd_lobby)) { skb_queue_splice_tail_init(&sp->pd_lobby, &sk->sk_receive_queue); return 1; } } else { /* There are other associations in PD, so we only need to * pull stuff out of the lobby that belongs to the * associations that is exiting PD (all of its notifications * are posted here). */ if (!skb_queue_empty(&sp->pd_lobby) && asoc) { struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; sctp_skb_for_each(skb, &sp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == asoc) { __skb_unlink(skb, &sp->pd_lobby); __skb_queue_tail(&sk->sk_receive_queue, skb); } } } } return 0; } /* Set the pd_mode on the socket and ulpq */ static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq) { struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk); atomic_inc(&sp->pd_mode); ulpq->pd_mode = 1; } /* Clear the pd_mode and restart any pending messages waiting for delivery. */ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) { ulpq->pd_mode = 0; sctp_ulpq_reasm_drain(ulpq); return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc); } int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sk_buff_head *skb_list) { struct sock *sk = ulpq->asoc->base.sk; struct sctp_sock *sp = sctp_sk(sk); struct sctp_ulpevent *event; struct sk_buff_head *queue; struct sk_buff *skb; int clear_pd = 0; skb = __skb_peek(skb_list); event = sctp_skb2event(skb); /* If the socket is just going to throw this away, do not * even try to deliver it. */ if (sk->sk_shutdown & RCV_SHUTDOWN && (sk->sk_shutdown & SEND_SHUTDOWN || !sctp_ulpevent_is_notification(event))) goto out_free; if (!sctp_ulpevent_is_notification(event)) { sk_mark_napi_id(sk, skb); sk_incoming_cpu_update(sk); } /* Check if the user wishes to receive this event. */ if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe)) goto out_free; /* If we are in partial delivery mode, post to the lobby until * partial delivery is cleared, unless, of course _this_ is * the association the cause of the partial delivery. */ if (atomic_read(&sp->pd_mode) == 0) { queue = &sk->sk_receive_queue; } else { if (ulpq->pd_mode) { /* If the association is in partial delivery, we * need to finish delivering the partially processed * packet before passing any other data. This is * because we don't truly support stream interleaving. */ if ((event->msg_flags & MSG_NOTIFICATION) || (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK))) queue = &sp->pd_lobby; else { clear_pd = event->msg_flags & MSG_EOR; queue = &sk->sk_receive_queue; } } else { /* * If fragment interleave is enabled, we * can queue this to the receive queue instead * of the lobby. */ if (sp->frag_interleave) queue = &sk->sk_receive_queue; else queue = &sp->pd_lobby; } } skb_queue_splice_tail_init(skb_list, queue); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive * queue. */ if (clear_pd) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { if (!sock_owned_by_user(sk)) sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } return 1; out_free: sctp_queue_purge_ulpevents(skb_list); return 0; } /* 2nd Level Abstractions */ /* Helper function to store chunks that need to be reassembled. */ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff *pos; struct sctp_ulpevent *cevent; __u32 tsn, ctsn; tsn = event->tsn; /* See if it belongs at the end. */ pos = skb_peek_tail(&ulpq->reasm); if (!pos) { __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); return; } /* Short circuit just dropping it at the end. */ cevent = sctp_skb2event(pos); ctsn = cevent->tsn; if (TSN_lt(ctsn, tsn)) { __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); return; } /* Find the right place in this list. We store them by TSN. */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; if (TSN_lt(tsn, ctsn)) break; } /* Insert before pos. */ __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); } /* Helper function to return an event corresponding to the reassembled * datagram. * This routine creates a re-assembled skb given the first and last skb's * as stored in the reassembly queue. The skb's may be non-linear if the sctp * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; struct sctp_ulpevent *event; struct sk_buff *pnext, *last; struct sk_buff *list = skb_shinfo(f_frag)->frag_list; /* Store the pointer to the 2nd skb */ if (f_frag == l_frag) pos = NULL; else pos = f_frag->next; /* Get the last skb in the f_frag's frag_list if present. */ for (last = list; list; last = list, list = list->next) ; /* Add the list of remaining fragments to the first fragments * frag_list. */ if (last) last->next = pos; else { if (skb_cloned(f_frag)) { /* This is a cloned skb, we can't just modify * the frag_list. We need a new skb to do that. * Instead of calling skb_unshare(), we'll do it * ourselves since we need to delay the free. */ new = skb_copy(f_frag, GFP_ATOMIC); if (!new) return NULL; /* try again later */ sctp_skb_set_owner_r(new, f_frag->sk); skb_shinfo(new)->frag_list = pos; } else skb_shinfo(f_frag)->frag_list = pos; } /* Remove the first fragment from the reassembly queue. */ __skb_unlink(f_frag, queue); /* if we did unshare, then free the old skb and re-assign */ if (new) { kfree_skb(f_frag); f_frag = new; } while (pos) { pnext = pos->next; /* Update the len and data_len fields of the first fragment. */ f_frag->len += pos->len; f_frag->data_len += pos->len; /* Remove the fragment from the reassembly queue. */ __skb_unlink(pos, queue); /* Break if we have reached the last fragment. */ if (pos == l_frag) break; pos->next = pnext; pos = pnext; } event = sctp_skb2event(f_frag); SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS); return event; } /* Helper function to check if an incoming chunk has filled up the last * missing fragment in a SCTP datagram and return the corresponding event. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ulpq) { struct sk_buff *pos; struct sctp_ulpevent *cevent; struct sk_buff *first_frag = NULL; __u32 ctsn, next_tsn; struct sctp_ulpevent *retval = NULL; struct sk_buff *pd_first = NULL; struct sk_buff *pd_last = NULL; size_t pd_len = 0; struct sctp_association *asoc; u32 pd_point; /* Initialized to 0 just to avoid compiler warning message. Will * never be used with this value. It is referenced only after it * is set when we find the first fragment of a message. */ next_tsn = 0; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for a sequence of * fragmented chunks that complete a datagram. * 'first_frag' and next_tsn are reset when we find a chunk which * is the first fragment of a datagram. Once these 2 fields are set * we expect to find the remaining middle fragments and the last * fragment in order. If not, first_frag is reset to NULL and we * start the next pass when we find another first fragment. * * There is a potential to do partial delivery if user sets * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here * to see if can do PD. */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: /* If this "FIRST_FRAG" is the first * element in the queue, then count it towards * possible PD. */ if (skb_queue_is_first(&ulpq->reasm, pos)) { pd_first = pos; pd_last = pos; pd_len = pos->len; } else { pd_first = NULL; pd_last = NULL; pd_len = 0; } first_frag = pos; next_tsn = ctsn + 1; break; case SCTP_DATA_MIDDLE_FRAG: if ((first_frag) && (ctsn == next_tsn)) { next_tsn++; if (pd_first) { pd_last = pos; pd_len += pos->len; } } else first_frag = NULL; break; case SCTP_DATA_LAST_FRAG: if (first_frag && (ctsn == next_tsn)) goto found; else first_frag = NULL; break; } } asoc = ulpq->asoc; if (pd_first) { /* Make sure we can enter partial deliver. * We can trigger partial delivery only if framgent * interleave is set, or the socket is not already * in partial delivery. */ if (!sctp_sk(asoc->base.sk)->frag_interleave && atomic_read(&sctp_sk(asoc->base.sk)->pd_mode)) goto done; cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm, pd_first, pd_last); if (retval) sctp_ulpq_set_pd(ulpq); } } done: return retval; found: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; } /* Retrieve the next set of fragments of a partial message. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; int is_last; struct sctp_ulpevent *retval; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for the first * sequence of fragmented chunks. */ if (skb_queue_empty(&ulpq->reasm)) return NULL; last_frag = first_frag = NULL; retval = NULL; next_tsn = 0; is_last = 0; skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: if (!first_frag) return NULL; goto done; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; } else if (next_tsn == ctsn) { next_tsn++; last_frag = pos; } else goto done; break; case SCTP_DATA_LAST_FRAG: if (!first_frag) first_frag = pos; else if (ctsn != next_tsn) goto done; last_frag = pos; is_last = 1; goto done; default: return NULL; } } /* We have the reassembled event. There is no need to look * further. */ done: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; return retval; } /* Helper function to reassemble chunks. Hold chunks on the reasm queue that * need reassembling. */ static struct sctp_ulpevent *sctp_ulpq_reasm(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sctp_ulpevent *retval = NULL; /* Check if this is part of a fragmented message. */ if (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK)) { event->msg_flags |= MSG_EOR; return event; } sctp_ulpq_store_reasm(ulpq, event); if (!ulpq->pd_mode) retval = sctp_ulpq_retrieve_reassembled(ulpq); else { __u32 ctsn, ctsnap; /* Do not even bother unless this is the next tsn to * be delivered. */ ctsn = event->tsn; ctsnap = sctp_tsnmap_get_ctsn(&ulpq->asoc->peer.tsn_map); if (TSN_lte(ctsn, ctsnap)) retval = sctp_ulpq_retrieve_partial(ulpq); } return retval; } /* Retrieve the first part (sequential fragments) for partial delivery. */ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *last_frag, *first_frag; struct sctp_ulpevent *cevent; __u32 ctsn, next_tsn; struct sctp_ulpevent *retval; /* The chunks are held in the reasm queue sorted by TSN. * Walk through the queue sequentially and look for a sequence of * fragmented chunks that start a datagram. */ if (skb_queue_empty(&ulpq->reasm)) return NULL; last_frag = first_frag = NULL; retval = NULL; next_tsn = 0; skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: if (!first_frag) { first_frag = pos; next_tsn = ctsn + 1; last_frag = pos; } else goto done; break; case SCTP_DATA_MIDDLE_FRAG: if (!first_frag) return NULL; if (ctsn == next_tsn) { next_tsn++; last_frag = pos; } else goto done; break; case SCTP_DATA_LAST_FRAG: if (!first_frag) return NULL; else goto done; break; default: return NULL; } } /* We have the reassembled event. There is no need to look * further. */ done: retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm, first_frag, last_frag); return retval; } /* * Flush out stale fragments from the reassembly queue when processing * a Forward TSN. * * RFC 3758, Section 3.6 * * After receiving and processing a FORWARD TSN, the data receiver MUST * take cautions in updating its re-assembly queue. The receiver MUST * remove any partially reassembled message, which is still missing one * or more TSNs earlier than or equal to the new cumulative TSN point. * In the event that the receiver has invoked the partial delivery API, * a notification SHOULD also be generated to inform the upper layer API * that the message being partially delivered will NOT be completed. */ void sctp_ulpq_reasm_flushtsn(struct sctp_ulpq *ulpq, __u32 fwd_tsn) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *event; __u32 tsn; if (skb_queue_empty(&ulpq->reasm)) return; skb_queue_walk_safe(&ulpq->reasm, pos, tmp) { event = sctp_skb2event(pos); tsn = event->tsn; /* Since the entire message must be abandoned by the * sender (item A3 in Section 3.5, RFC 3758), we can * free all fragments on the list that are less then * or equal to ctsn_point */ if (TSN_lte(tsn, fwd_tsn)) { __skb_unlink(pos, &ulpq->reasm); sctp_ulpevent_free(event); } else break; } } /* * Drain the reassembly queue. If we just cleared parted delivery, it * is possible that the reassembly queue will contain already reassembled * messages. Retrieve any such messages and give them to the user. */ static void sctp_ulpq_reasm_drain(struct sctp_ulpq *ulpq) { struct sctp_ulpevent *event = NULL; if (skb_queue_empty(&ulpq->reasm)) return; while ((event = sctp_ulpq_retrieve_reassembled(ulpq)) != NULL) { struct sk_buff_head temp; skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); /* Do ordering if needed. */ if (event->msg_flags & MSG_EOR) event = sctp_ulpq_order(ulpq, event); /* Send event to the ULP. 'event' is the * sctp_ulpevent for very first SKB on the temp' list. */ if (event) sctp_ulpq_tail_event(ulpq, &temp); } } /* Helper function to gather skbs that have possibly become * ordered by an incoming chunk. */ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff_head *event_list; struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *stream; __u16 sid, csid, cssn; sid = event->stream; stream = &ulpq->asoc->stream; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; /* We are holding the chunks by stream, by SSN. */ sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; /* Have we gone too far? */ if (csid > sid) break; /* Have we not gone far enough? */ if (csid < sid) continue; if (cssn != sctp_ssn_peek(stream, in, sid)) break; /* Found it, so mark in the stream. */ sctp_ssn_next(stream, in, sid); __skb_unlink(pos, &ulpq->lobby); /* Attach all gathered skbs to the event. */ __skb_queue_tail(event_list, pos); } } /* Helper function to store chunks needing ordering. */ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sk_buff *pos; struct sctp_ulpevent *cevent; __u16 sid, csid; __u16 ssn, cssn; pos = skb_peek_tail(&ulpq->lobby); if (!pos) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } sid = event->stream; ssn = event->ssn; cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (sid > csid) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } if ((sid == csid) && SSN_lt(cssn, ssn)) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; } /* Find the right place in this list. We store them by * stream ID and then by SSN. */ skb_queue_walk(&ulpq->lobby, pos) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (csid > sid) break; if (csid == sid && SSN_lt(ssn, cssn)) break; } /* Insert before pos. */ __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { __u16 sid, ssn; struct sctp_stream *stream; /* Check if this message needs ordering. */ if (event->msg_flags & SCTP_DATA_UNORDERED) return event; /* Note: The stream ID must be verified before this routine. */ sid = event->stream; ssn = event->ssn; stream = &ulpq->asoc->stream; /* Is this the expected SSN for this stream ID? */ if (ssn != sctp_ssn_peek(stream, in, sid)) { /* We've received something out of order, so find where it * needs to be placed. We order by stream and then by SSN. */ sctp_ulpq_store_ordered(ulpq, event); return NULL; } /* Mark that the next chunk has been found. */ sctp_ssn_next(stream, in, sid); /* Go find any other chunks that were waiting for * ordering. */ sctp_ulpq_retrieve_ordered(ulpq, event); return event; } /* Helper function to gather skbs that have possibly become * ordered by forward tsn skipping their dependencies. */ static void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq, __u16 sid) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_ulpevent *event; struct sctp_stream *stream; struct sk_buff_head temp; struct sk_buff_head *lobby = &ulpq->lobby; __u16 csid, cssn; stream = &ulpq->asoc->stream; /* We are holding the chunks by stream, by SSN. */ skb_queue_head_init(&temp); event = NULL; sctp_skb_for_each(pos, lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; /* Have we gone too far? */ if (csid > sid) break; /* Have we not gone far enough? */ if (csid < sid) continue; /* see if this ssn has been marked by skipping */ if (!SSN_lt(cssn, sctp_ssn_peek(stream, in, csid))) break; __skb_unlink(pos, lobby); if (!event) /* Create a temporary list to collect chunks on. */ event = sctp_skb2event(pos); /* Attach all gathered skbs to the event. */ __skb_queue_tail(&temp, pos); } /* If we didn't reap any data, see if the next expected SSN * is next on the queue and if so, use that. */ if (event == NULL && pos != (struct sk_buff *)lobby) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; cssn = cevent->ssn; if (csid == sid && cssn == sctp_ssn_peek(stream, in, csid)) { sctp_ssn_next(stream, in, csid); __skb_unlink(pos, lobby); __skb_queue_tail(&temp, pos); event = sctp_skb2event(pos); } } /* Send event to the ULP. 'event' is the sctp_ulpevent for * very first SKB on the 'temp' list. */ if (event) { /* see if we have more ordered that we can deliver */ sctp_ulpq_retrieve_ordered(ulpq, event); sctp_ulpq_tail_event(ulpq, &temp); } } /* Skip over an SSN. This is used during the processing of * Forwared TSN chunk to skip over the abandoned ordered data */ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn) { struct sctp_stream *stream; /* Note: The stream ID must be verified before this routine. */ stream = &ulpq->asoc->stream; /* Is this an old SSN? If so ignore. */ if (SSN_lt(ssn, sctp_ssn_peek(stream, in, sid))) return; /* Mark that we are no longer expecting this SSN or lower. */ sctp_ssn_skip(stream, in, sid, ssn); /* Go find any other chunks that were waiting for * ordering and deliver them if needed. */ sctp_ulpq_reap_ordered(ulpq, sid); } __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list, __u16 needed) { __u16 freed = 0; __u32 tsn, last_tsn; struct sk_buff *skb, *flist, *last; struct sctp_ulpevent *event; struct sctp_tsnmap *tsnmap; tsnmap = &ulpq->asoc->peer.tsn_map; while ((skb = skb_peek_tail(list)) != NULL) { event = sctp_skb2event(skb); tsn = event->tsn; /* Don't renege below the Cumulative TSN ACK Point. */ if (TSN_lte(tsn, sctp_tsnmap_get_ctsn(tsnmap))) break; /* Events in ordering queue may have multiple fragments * corresponding to additional TSNs. Sum the total * freed space; find the last TSN. */ freed += skb_headlen(skb); flist = skb_shinfo(skb)->frag_list; for (last = flist; flist; flist = flist->next) { last = flist; freed += skb_headlen(last); } if (last) last_tsn = sctp_skb2event(last)->tsn; else last_tsn = tsn; /* Unlink the event, then renege all applicable TSNs. */ __skb_unlink(skb, list); sctp_ulpevent_free(event); while (TSN_lte(tsn, last_tsn)) { sctp_tsnmap_renege(tsnmap, tsn); tsn++; } if (freed >= needed) return freed; } return freed; } /* Renege 'needed' bytes from the ordering queue. */ static __u16 sctp_ulpq_renege_order(struct sctp_ulpq *ulpq, __u16 needed) { return sctp_ulpq_renege_list(ulpq, &ulpq->lobby, needed); } /* Renege 'needed' bytes from the reassembly queue. */ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) { return sctp_ulpq_renege_list(ulpq, &ulpq->reasm, needed); } /* Partial deliver the first message as there is pressure on rwnd. */ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_association *asoc; struct sctp_sock *sp; __u32 ctsn; struct sk_buff *skb; asoc = ulpq->asoc; sp = sctp_sk(asoc->base.sk); /* If the association is already in Partial Delivery mode * we have nothing to do. */ if (ulpq->pd_mode) return; /* Data must be at or below the Cumulative TSN ACK Point to * start partial delivery. */ skb = skb_peek(&asoc->ulpq.reasm); if (skb != NULL) { ctsn = sctp_skb2event(skb)->tsn; if (!TSN_lte(ctsn, sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map))) return; } /* If the user enabled fragment interleave socket option, * multiple associations can enter partial delivery. * Otherwise, we can only enter partial delivery if the * socket is not in partial deliver mode. */ if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) { /* Is partial delivery possible? */ event = sctp_ulpq_retrieve_first(ulpq); /* Send event to the ULP. */ if (event) { struct sk_buff_head temp; skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); sctp_ulpq_tail_event(ulpq, &temp); sctp_ulpq_set_pd(ulpq); return; } } } /* Renege some packets to make room for an incoming chunk. */ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sctp_association *asoc = ulpq->asoc; __u32 freed = 0; __u16 needed; needed = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_data_chunk); if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) { freed = sctp_ulpq_renege_order(ulpq, needed); if (freed < needed) freed += sctp_ulpq_renege_frags(ulpq, needed - freed); } /* If able to free enough room, accept this chunk. */ if (sk_rmem_schedule(asoc->base.sk, chunk->skb, needed) && freed >= needed) { int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp); /* * Enter partial delivery if chunk has not been * delivered; otherwise, drain the reassembly queue. */ if (retval <= 0) sctp_ulpq_partial_delivery(ulpq, gfp); else if (retval == 1) sctp_ulpq_reasm_drain(ulpq); } } /* Notify the application if an association is aborted and in * partial delivery mode. Send up any pending received messages. */ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sctp_sock *sp; struct sock *sk; if (!ulpq->pd_mode) return; sk = ulpq->asoc->base.sk; sp = sctp_sk(sk); if (sctp_ulpevent_type_enabled(ulpq->asoc->subscribe, SCTP_PARTIAL_DELIVERY_EVENT)) ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED, 0, 0, 0, gfp); if (ev) __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); /* If there is data waiting, send it up the socket now. */ if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) { sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } } |
167 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 | /* * include/linux/topology.h * * Written by: Matthew Dobson, IBM Corporation * * Copyright (C) 2002, IBM Corp. * * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Send feedback to <colpatch@us.ibm.com> */ #ifndef _LINUX_TOPOLOGY_H #define _LINUX_TOPOLOGY_H #include <linux/arch_topology.h> #include <linux/cpumask.h> #include <linux/bitops.h> #include <linux/mmzone.h> #include <linux/smp.h> #include <linux/percpu.h> #include <asm/topology.h> #ifndef nr_cpus_node #define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) #endif #define for_each_node_with_cpus(node) \ for_each_online_node(node) \ if (nr_cpus_node(node)) int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 #define REMOTE_DISTANCE 20 #define DISTANCE_BITS 8 #ifndef node_distance #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) #endif #ifndef RECLAIM_DISTANCE /* * If the distance between nodes in a system is larger than RECLAIM_DISTANCE * (in whatever arch specific measurement units returned by node_distance()) * and node_reclaim_mode is enabled then the VM will only call node_reclaim() * on nodes within this distance. */ #define RECLAIM_DISTANCE 30 #endif /* * The following tunable allows platforms to override the default node * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are * sufficiently fast that the default value actually hurts * performance. * * AMD EPYC machines use this because even though the 2-hop distance * is 32 (3.2x slower than a local memory access) performance actually * *improves* if allowed to reclaim memory and load balance tasks * between NUMA nodes 2-hops apart. */ extern int __read_mostly node_reclaim_distance; #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DECLARE_PER_CPU(int, numa_node); #ifndef numa_node_id /* Returns the number of the current Node. */ static inline int numa_node_id(void) { return raw_cpu_read(numa_node); } #endif #ifndef cpu_to_node static inline int cpu_to_node(int cpu) { return per_cpu(numa_node, cpu); } #endif #ifndef set_numa_node static inline void set_numa_node(int node) { this_cpu_write(numa_node, node); } #endif #ifndef set_cpu_numa_node static inline void set_cpu_numa_node(int cpu, int node) { per_cpu(numa_node, cpu) = node; } #endif #else /* !CONFIG_USE_PERCPU_NUMA_NODE_ID */ /* Returns the number of the current Node. */ #ifndef numa_node_id static inline int numa_node_id(void) { return cpu_to_node(raw_smp_processor_id()); } #endif #endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ #ifdef CONFIG_HAVE_MEMORYLESS_NODES /* * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). */ DECLARE_PER_CPU(int, _numa_mem_); #ifndef set_numa_mem static inline void set_numa_mem(int node) { this_cpu_write(_numa_mem_, node); } #endif #ifndef numa_mem_id /* Returns the number of the nearest Node with memory */ static inline int numa_mem_id(void) { return raw_cpu_read(_numa_mem_); } #endif #ifndef cpu_to_mem static inline int cpu_to_mem(int cpu) { return per_cpu(_numa_mem_, cpu); } #endif #ifndef set_cpu_numa_mem static inline void set_cpu_numa_mem(int cpu, int node) { per_cpu(_numa_mem_, cpu) = node; } #endif #else /* !CONFIG_HAVE_MEMORYLESS_NODES */ #ifndef numa_mem_id /* Returns the number of the nearest Node with memory */ static inline int numa_mem_id(void) { return numa_node_id(); } #endif #ifndef cpu_to_mem static inline int cpu_to_mem(int cpu) { return cpu_to_node(cpu); } #endif #endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ #if defined(topology_die_id) && defined(topology_die_cpumask) #define TOPOLOGY_DIE_SYSFS #endif #if defined(topology_cluster_id) && defined(topology_cluster_cpumask) #define TOPOLOGY_CLUSTER_SYSFS #endif #if defined(topology_book_id) && defined(topology_book_cpumask) #define TOPOLOGY_BOOK_SYSFS #endif #if defined(topology_drawer_id) && defined(topology_drawer_cpumask) #define TOPOLOGY_DRAWER_SYSFS #endif #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) #endif #ifndef topology_die_id #define topology_die_id(cpu) ((void)(cpu), -1) #endif #ifndef topology_cluster_id #define topology_cluster_id(cpu) ((void)(cpu), -1) #endif #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif #ifndef topology_book_id #define topology_book_id(cpu) ((void)(cpu), -1) #endif #ifndef topology_drawer_id #define topology_drawer_id(cpu) ((void)(cpu), -1) #endif #ifndef topology_ppin #define topology_ppin(cpu) ((void)(cpu), 0ull) #endif #ifndef topology_sibling_cpumask #define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_core_cpumask #define topology_core_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_cluster_cpumask #define topology_cluster_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_die_cpumask #define topology_die_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_book_cpumask #define topology_book_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_drawer_cpumask #define topology_drawer_cpumask(cpu) cpumask_of(cpu) #endif #if defined(CONFIG_SCHED_SMT) && !defined(cpu_smt_mask) static inline const struct cpumask *cpu_smt_mask(int cpu) { return topology_sibling_cpumask(cpu); } #endif static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); } #ifdef CONFIG_NUMA int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops); #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { return cpumask_nth_and(cpu, cpus, cpu_online_mask); } static inline const struct cpumask * sched_numa_hop_mask(unsigned int node, unsigned int hops) { return ERR_PTR(-EOPNOTSUPP); } #endif /* CONFIG_NUMA */ /** * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance * from a given node. * @mask: the iteration variable. * @node: the NUMA node to start the search from. * * Requires rcu_lock to be held. * * Yields cpu_online_mask for @node == NUMA_NO_NODE. */ #define for_each_numa_hop_mask(mask, node) \ for (unsigned int __hops = 0; \ mask = (node != NUMA_NO_NODE || __hops) ? \ sched_numa_hop_mask(node, __hops) : \ cpu_online_mask, \ !IS_ERR_OR_NULL(mask); \ __hops++) #endif /* _LINUX_TOPOLOGY_H */ |
13 13 14 33 1 32 2 30 1 31 29 30 29 1 27 1 28 13 13 13 13 13 13 13 18 17 17 17 17 18 18 18 18 18 18 18 18 18 18 18 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include <linux/uaccess.h> #include <drm/drm_drv.h> #include <drm/drm_encoder.h> #include <drm/drm_file.h> #include <drm/drm_framebuffer.h> #include <drm/drm_managed.h> #include <drm/drm_mode_config.h> #include <drm/drm_print.h> #include <linux/dma-resv.h> #include "drm_crtc_internal.h" #include "drm_internal.h" int drm_modeset_register_all(struct drm_device *dev) { int ret; ret = drm_plane_register_all(dev); if (ret) goto err_plane; ret = drm_crtc_register_all(dev); if (ret) goto err_crtc; ret = drm_encoder_register_all(dev); if (ret) goto err_encoder; ret = drm_connector_register_all(dev); if (ret) goto err_connector; return 0; err_connector: drm_encoder_unregister_all(dev); err_encoder: drm_crtc_unregister_all(dev); err_crtc: drm_plane_unregister_all(dev); err_plane: return ret; } void drm_modeset_unregister_all(struct drm_device *dev) { drm_connector_unregister_all(dev); drm_encoder_unregister_all(dev); drm_crtc_unregister_all(dev); drm_plane_unregister_all(dev); } /** * drm_mode_getresources - get graphics configuration * @dev: drm device for the ioctl * @data: data pointer for the ioctl * @file_priv: drm file for the ioctl call * * Construct a set of configuration description structures and return * them to the user, including CRTC, connector and framebuffer configuration. * * Called by the user via ioctl. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_getresources(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_card_res *card_res = data; struct drm_framebuffer *fb; struct drm_connector *connector; struct drm_crtc *crtc; struct drm_encoder *encoder; int count, ret = 0; uint32_t __user *fb_id; uint32_t __user *crtc_id; uint32_t __user *connector_id; uint32_t __user *encoder_id; struct drm_connector_list_iter conn_iter; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; mutex_lock(&file_priv->fbs_lock); count = 0; fb_id = u64_to_user_ptr(card_res->fb_id_ptr); list_for_each_entry(fb, &file_priv->fbs, filp_head) { if (count < card_res->count_fbs && put_user(fb->base.id, fb_id + count)) { mutex_unlock(&file_priv->fbs_lock); return -EFAULT; } count++; } card_res->count_fbs = count; mutex_unlock(&file_priv->fbs_lock); card_res->max_height = dev->mode_config.max_height; card_res->min_height = dev->mode_config.min_height; card_res->max_width = dev->mode_config.max_width; card_res->min_width = dev->mode_config.min_width; count = 0; crtc_id = u64_to_user_ptr(card_res->crtc_id_ptr); drm_for_each_crtc(crtc, dev) { if (drm_lease_held(file_priv, crtc->base.id)) { if (count < card_res->count_crtcs && put_user(crtc->base.id, crtc_id + count)) return -EFAULT; count++; } } card_res->count_crtcs = count; count = 0; encoder_id = u64_to_user_ptr(card_res->encoder_id_ptr); drm_for_each_encoder(encoder, dev) { if (count < card_res->count_encoders && put_user(encoder->base.id, encoder_id + count)) return -EFAULT; count++; } card_res->count_encoders = count; drm_connector_list_iter_begin(dev, &conn_iter); count = 0; connector_id = u64_to_user_ptr(card_res->connector_id_ptr); /* * FIXME: the connectors on the list may not be fully initialized yet, * if the ioctl is called before the connectors are registered. (See * drm_dev_register()->drm_modeset_register_all() for static and * drm_connector_dynamic_register() for dynamic connectors.) * The driver should only get registered after static connectors are * fully initialized and dynamic connectors should be added to the * connector list only after fully initializing them. */ drm_for_each_connector_iter(connector, &conn_iter) { /* only expose writeback connectors if userspace understands them */ if (!file_priv->writeback_connectors && (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)) continue; if (drm_lease_held(file_priv, connector->base.id)) { if (count < card_res->count_connectors && put_user(connector->base.id, connector_id + count)) { drm_connector_list_iter_end(&conn_iter); return -EFAULT; } count++; } } card_res->count_connectors = count; drm_connector_list_iter_end(&conn_iter); return ret; } /** * drm_mode_config_reset - call ->reset callbacks * @dev: drm device * * This functions calls all the crtc's, encoder's and connector's ->reset * callback. Drivers can use this in e.g. their driver load or resume code to * reset hardware and software state. */ void drm_mode_config_reset(struct drm_device *dev) { struct drm_crtc *crtc; struct drm_plane *plane; struct drm_encoder *encoder; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; drm_for_each_plane(plane, dev) if (plane->funcs->reset) plane->funcs->reset(plane); drm_for_each_crtc(crtc, dev) if (crtc->funcs->reset) crtc->funcs->reset(crtc); drm_for_each_encoder(encoder, dev) if (encoder->funcs && encoder->funcs->reset) encoder->funcs->reset(encoder); drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) if (connector->funcs->reset) connector->funcs->reset(connector); drm_connector_list_iter_end(&conn_iter); } EXPORT_SYMBOL(drm_mode_config_reset); /* * Global properties */ static const struct drm_prop_enum_list drm_plane_type_enum_list[] = { { DRM_PLANE_TYPE_OVERLAY, "Overlay" }, { DRM_PLANE_TYPE_PRIMARY, "Primary" }, { DRM_PLANE_TYPE_CURSOR, "Cursor" }, }; static int drm_mode_create_standard_properties(struct drm_device *dev) { struct drm_property *prop; int ret; ret = drm_connector_create_standard_properties(dev); if (ret) return ret; prop = drm_property_create_enum(dev, DRM_MODE_PROP_IMMUTABLE, "type", drm_plane_type_enum_list, ARRAY_SIZE(drm_plane_type_enum_list)); if (!prop) return -ENOMEM; dev->mode_config.plane_type_property = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "SRC_X", 0, UINT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_src_x = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "SRC_Y", 0, UINT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_src_y = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "SRC_W", 0, UINT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_src_w = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "SRC_H", 0, UINT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_src_h = prop; prop = drm_property_create_signed_range(dev, DRM_MODE_PROP_ATOMIC, "CRTC_X", INT_MIN, INT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_crtc_x = prop; prop = drm_property_create_signed_range(dev, DRM_MODE_PROP_ATOMIC, "CRTC_Y", INT_MIN, INT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_crtc_y = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "CRTC_W", 0, INT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_crtc_w = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "CRTC_H", 0, INT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_crtc_h = prop; prop = drm_property_create_object(dev, DRM_MODE_PROP_ATOMIC, "FB_ID", DRM_MODE_OBJECT_FB); if (!prop) return -ENOMEM; dev->mode_config.prop_fb_id = prop; prop = drm_property_create_signed_range(dev, DRM_MODE_PROP_ATOMIC, "IN_FENCE_FD", -1, INT_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_in_fence_fd = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_ATOMIC, "OUT_FENCE_PTR", 0, U64_MAX); if (!prop) return -ENOMEM; dev->mode_config.prop_out_fence_ptr = prop; prop = drm_property_create_object(dev, DRM_MODE_PROP_ATOMIC, "CRTC_ID", DRM_MODE_OBJECT_CRTC); if (!prop) return -ENOMEM; dev->mode_config.prop_crtc_id = prop; prop = drm_property_create(dev, DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB, "FB_DAMAGE_CLIPS", 0); if (!prop) return -ENOMEM; dev->mode_config.prop_fb_damage_clips = prop; prop = drm_property_create_bool(dev, DRM_MODE_PROP_ATOMIC, "ACTIVE"); if (!prop) return -ENOMEM; dev->mode_config.prop_active = prop; prop = drm_property_create(dev, DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_BLOB, "MODE_ID", 0); if (!prop) return -ENOMEM; dev->mode_config.prop_mode_id = prop; prop = drm_property_create_bool(dev, 0, "VRR_ENABLED"); if (!prop) return -ENOMEM; dev->mode_config.prop_vrr_enabled = prop; prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, "DEGAMMA_LUT", 0); if (!prop) return -ENOMEM; dev->mode_config.degamma_lut_property = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE, "DEGAMMA_LUT_SIZE", 0, UINT_MAX); if (!prop) return -ENOMEM; dev->mode_config.degamma_lut_size_property = prop; prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, "CTM", 0); if (!prop) return -ENOMEM; dev->mode_config.ctm_property = prop; prop = drm_property_create(dev, DRM_MODE_PROP_BLOB, "GAMMA_LUT", 0); if (!prop) return -ENOMEM; dev->mode_config.gamma_lut_property = prop; prop = drm_property_create_range(dev, DRM_MODE_PROP_IMMUTABLE, "GAMMA_LUT_SIZE", 0, UINT_MAX); if (!prop) return -ENOMEM; dev->mode_config.gamma_lut_size_property = prop; prop = drm_property_create(dev, DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_BLOB, "IN_FORMATS", 0); if (!prop) return -ENOMEM; dev->mode_config.modifiers_property = prop; prop = drm_property_create(dev, DRM_MODE_PROP_IMMUTABLE | DRM_MODE_PROP_BLOB, "SIZE_HINTS", 0); if (!prop) return -ENOMEM; dev->mode_config.size_hints_property = prop; return 0; } static void drm_mode_config_init_release(struct drm_device *dev, void *ptr) { drm_mode_config_cleanup(dev); } /** * drmm_mode_config_init - managed DRM mode_configuration structure * initialization * @dev: DRM device * * Initialize @dev's mode_config structure, used for tracking the graphics * configuration of @dev. * * Since this initializes the modeset locks, no locking is possible. Which is no * problem, since this should happen single threaded at init time. It is the * driver's problem to ensure this guarantee. * * Cleanup is automatically handled through registering drm_mode_config_cleanup * with drmm_add_action(). * * Returns: 0 on success, negative error value on failure. */ int drmm_mode_config_init(struct drm_device *dev) { int ret; mutex_init(&dev->mode_config.mutex); drm_modeset_lock_init(&dev->mode_config.connection_mutex); mutex_init(&dev->mode_config.idr_mutex); mutex_init(&dev->mode_config.fb_lock); mutex_init(&dev->mode_config.blob_lock); INIT_LIST_HEAD(&dev->mode_config.fb_list); INIT_LIST_HEAD(&dev->mode_config.crtc_list); INIT_LIST_HEAD(&dev->mode_config.connector_list); INIT_LIST_HEAD(&dev->mode_config.encoder_list); INIT_LIST_HEAD(&dev->mode_config.property_list); INIT_LIST_HEAD(&dev->mode_config.property_blob_list); INIT_LIST_HEAD(&dev->mode_config.plane_list); INIT_LIST_HEAD(&dev->mode_config.privobj_list); idr_init_base(&dev->mode_config.object_idr, 1); idr_init_base(&dev->mode_config.tile_idr, 1); ida_init(&dev->mode_config.connector_ida); spin_lock_init(&dev->mode_config.connector_list_lock); init_llist_head(&dev->mode_config.connector_free_list); INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn); ret = drm_mode_create_standard_properties(dev); if (ret) { drm_mode_config_cleanup(dev); return ret; } /* Just to be sure */ dev->mode_config.num_fb = 0; dev->mode_config.num_connector = 0; dev->mode_config.num_crtc = 0; dev->mode_config.num_encoder = 0; dev->mode_config.num_total_plane = 0; if (IS_ENABLED(CONFIG_LOCKDEP)) { struct drm_modeset_acquire_ctx modeset_ctx; struct ww_acquire_ctx resv_ctx; struct dma_resv resv; int ret; dma_resv_init(&resv); drm_modeset_acquire_init(&modeset_ctx, 0); ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &modeset_ctx); if (ret == -EDEADLK) ret = drm_modeset_backoff(&modeset_ctx); might_fault(); ww_acquire_init(&resv_ctx, &reservation_ww_class); ret = dma_resv_lock(&resv, &resv_ctx); if (ret == -EDEADLK) dma_resv_lock_slow(&resv, &resv_ctx); dma_resv_unlock(&resv); ww_acquire_fini(&resv_ctx); drm_modeset_drop_locks(&modeset_ctx); drm_modeset_acquire_fini(&modeset_ctx); dma_resv_fini(&resv); } return drmm_add_action_or_reset(dev, drm_mode_config_init_release, NULL); } EXPORT_SYMBOL(drmm_mode_config_init); /** * drm_mode_config_cleanup - free up DRM mode_config info * @dev: DRM device * * Free up all the connectors and CRTCs associated with this DRM device, then * free up the framebuffers and associated buffer objects. * * Note that since this /should/ happen single-threaded at driver/device * teardown time, no locking is required. It's the driver's job to ensure that * this guarantee actually holds true. * * FIXME: With the managed drmm_mode_config_init() it is no longer necessary for * drivers to explicitly call this function. */ void drm_mode_config_cleanup(struct drm_device *dev) { struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct drm_crtc *crtc, *ct; struct drm_encoder *encoder, *enct; struct drm_framebuffer *fb, *fbt; struct drm_property *property, *pt; struct drm_property_blob *blob, *bt; struct drm_plane *plane, *plt; list_for_each_entry_safe(encoder, enct, &dev->mode_config.encoder_list, head) { encoder->funcs->destroy(encoder); } drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { /* drm_connector_list_iter holds an full reference to the * current connector itself, which means it is inherently safe * against unreferencing the current connector - but not against * deleting it right away. */ drm_connector_put(connector); } drm_connector_list_iter_end(&conn_iter); /* connector_iter drops references in a work item. */ flush_work(&dev->mode_config.connector_free_work); if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) { drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) DRM_ERROR("connector %s leaked!\n", connector->name); drm_connector_list_iter_end(&conn_iter); } list_for_each_entry_safe(property, pt, &dev->mode_config.property_list, head) { drm_property_destroy(dev, property); } list_for_each_entry_safe(plane, plt, &dev->mode_config.plane_list, head) { plane->funcs->destroy(plane); } list_for_each_entry_safe(crtc, ct, &dev->mode_config.crtc_list, head) { crtc->funcs->destroy(crtc); } list_for_each_entry_safe(blob, bt, &dev->mode_config.property_blob_list, head_global) { drm_property_blob_put(blob); } /* * Single-threaded teardown context, so it's not required to grab the * fb_lock to protect against concurrent fb_list access. Contrary, it * would actually deadlock with the drm_framebuffer_cleanup function. * * Also, if there are any framebuffers left, that's a driver leak now, * so politely WARN about this. */ WARN_ON(!list_empty(&dev->mode_config.fb_list)); list_for_each_entry_safe(fb, fbt, &dev->mode_config.fb_list, head) { struct drm_printer p = drm_dbg_printer(dev, DRM_UT_KMS, "[leaked fb]"); drm_printf(&p, "framebuffer[%u]:\n", fb->base.id); drm_framebuffer_print_info(&p, 1, fb); drm_framebuffer_free(&fb->base.refcount); } ida_destroy(&dev->mode_config.connector_ida); idr_destroy(&dev->mode_config.tile_idr); idr_destroy(&dev->mode_config.object_idr); drm_modeset_lock_fini(&dev->mode_config.connection_mutex); } EXPORT_SYMBOL(drm_mode_config_cleanup); static u32 full_encoder_mask(struct drm_device *dev) { struct drm_encoder *encoder; u32 encoder_mask = 0; drm_for_each_encoder(encoder, dev) encoder_mask |= drm_encoder_mask(encoder); return encoder_mask; } /* * For some reason we want the encoder itself included in * possible_clones. Make life easy for drivers by allowing them * to leave possible_clones unset if no cloning is possible. */ static void fixup_encoder_possible_clones(struct drm_encoder *encoder) { if (encoder->possible_clones == 0) encoder->possible_clones = drm_encoder_mask(encoder); } static void validate_encoder_possible_clones(struct drm_encoder *encoder) { struct drm_device *dev = encoder->dev; u32 encoder_mask = full_encoder_mask(dev); struct drm_encoder *other; drm_for_each_encoder(other, dev) { WARN(!!(encoder->possible_clones & drm_encoder_mask(other)) != !!(other->possible_clones & drm_encoder_mask(encoder)), "possible_clones mismatch: " "[ENCODER:%d:%s] mask=0x%x possible_clones=0x%x vs. " "[ENCODER:%d:%s] mask=0x%x possible_clones=0x%x\n", encoder->base.id, encoder->name, drm_encoder_mask(encoder), encoder->possible_clones, other->base.id, other->name, drm_encoder_mask(other), other->possible_clones); } WARN((encoder->possible_clones & drm_encoder_mask(encoder)) == 0 || (encoder->possible_clones & ~encoder_mask) != 0, "Bogus possible_clones: " "[ENCODER:%d:%s] possible_clones=0x%x (full encoder mask=0x%x)\n", encoder->base.id, encoder->name, encoder->possible_clones, encoder_mask); } static u32 full_crtc_mask(struct drm_device *dev) { struct drm_crtc *crtc; u32 crtc_mask = 0; drm_for_each_crtc(crtc, dev) crtc_mask |= drm_crtc_mask(crtc); return crtc_mask; } static void validate_encoder_possible_crtcs(struct drm_encoder *encoder) { u32 crtc_mask = full_crtc_mask(encoder->dev); WARN((encoder->possible_crtcs & crtc_mask) == 0 || (encoder->possible_crtcs & ~crtc_mask) != 0, "Bogus possible_crtcs: " "[ENCODER:%d:%s] possible_crtcs=0x%x (full crtc mask=0x%x)\n", encoder->base.id, encoder->name, encoder->possible_crtcs, crtc_mask); } void drm_mode_config_validate(struct drm_device *dev) { struct drm_encoder *encoder; struct drm_crtc *crtc; struct drm_plane *plane; u32 primary_with_crtc = 0, cursor_with_crtc = 0; unsigned int num_primary = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return; drm_for_each_encoder(encoder, dev) fixup_encoder_possible_clones(encoder); drm_for_each_encoder(encoder, dev) { validate_encoder_possible_clones(encoder); validate_encoder_possible_crtcs(encoder); } drm_for_each_crtc(crtc, dev) { WARN(!crtc->primary, "Missing primary plane on [CRTC:%d:%s]\n", crtc->base.id, crtc->name); WARN(crtc->cursor && crtc->funcs->cursor_set, "[CRTC:%d:%s] must not have both a cursor plane and a cursor_set func", crtc->base.id, crtc->name); WARN(crtc->cursor && crtc->funcs->cursor_set2, "[CRTC:%d:%s] must not have both a cursor plane and a cursor_set2 func", crtc->base.id, crtc->name); WARN(crtc->cursor && crtc->funcs->cursor_move, "[CRTC:%d:%s] must not have both a cursor plane and a cursor_move func", crtc->base.id, crtc->name); if (crtc->primary) { WARN(!(crtc->primary->possible_crtcs & drm_crtc_mask(crtc)), "Bogus primary plane possible_crtcs: [PLANE:%d:%s] must be compatible with [CRTC:%d:%s]\n", crtc->primary->base.id, crtc->primary->name, crtc->base.id, crtc->name); WARN(primary_with_crtc & drm_plane_mask(crtc->primary), "Primary plane [PLANE:%d:%s] used for multiple CRTCs", crtc->primary->base.id, crtc->primary->name); primary_with_crtc |= drm_plane_mask(crtc->primary); } if (crtc->cursor) { WARN(!(crtc->cursor->possible_crtcs & drm_crtc_mask(crtc)), "Bogus cursor plane possible_crtcs: [PLANE:%d:%s] must be compatible with [CRTC:%d:%s]\n", crtc->cursor->base.id, crtc->cursor->name, crtc->base.id, crtc->name); WARN(cursor_with_crtc & drm_plane_mask(crtc->cursor), "Cursor plane [PLANE:%d:%s] used for multiple CRTCs", crtc->cursor->base.id, crtc->cursor->name); cursor_with_crtc |= drm_plane_mask(crtc->cursor); } } drm_for_each_plane(plane, dev) { if (plane->type == DRM_PLANE_TYPE_PRIMARY) num_primary++; } WARN(num_primary != dev->mode_config.num_crtc, "Must have as many primary planes as there are CRTCs, but have %u primary planes and %u CRTCs", num_primary, dev->mode_config.num_crtc); } |
8 2 4 4 8 5 3 8 15 14 5 4 7 10 5 7 13 4 13 7 6 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 | /* * xxHash - Extremely Fast Hash algorithm * Copyright (C) 2012-2016, Yann Collet. * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. This program is dual-licensed; you may select * either version 2 of the GNU General Public License ("GPL") or BSD license * ("BSD"). * * You can contact the author at: * - xxHash homepage: https://cyan4973.github.io/xxHash/ * - xxHash source repository: https://github.com/Cyan4973/xxHash */ #include <linux/unaligned.h> #include <linux/errno.h> #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/xxhash.h> /*-************************************* * Macros **************************************/ #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r))) #define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r))) #ifdef __LITTLE_ENDIAN # define XXH_CPU_LITTLE_ENDIAN 1 #else # define XXH_CPU_LITTLE_ENDIAN 0 #endif /*-************************************* * Constants **************************************/ static const uint32_t PRIME32_1 = 2654435761U; static const uint32_t PRIME32_2 = 2246822519U; static const uint32_t PRIME32_3 = 3266489917U; static const uint32_t PRIME32_4 = 668265263U; static const uint32_t PRIME32_5 = 374761393U; static const uint64_t PRIME64_1 = 11400714785074694791ULL; static const uint64_t PRIME64_2 = 14029467366897019727ULL; static const uint64_t PRIME64_3 = 1609587929392839161ULL; static const uint64_t PRIME64_4 = 9650029242287828579ULL; static const uint64_t PRIME64_5 = 2870177450012600261ULL; /*-************************** * Utils ***************************/ void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src) { memcpy(dst, src, sizeof(*dst)); } EXPORT_SYMBOL(xxh32_copy_state); void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src) { memcpy(dst, src, sizeof(*dst)); } EXPORT_SYMBOL(xxh64_copy_state); /*-*************************** * Simple Hash Functions ****************************/ static uint32_t xxh32_round(uint32_t seed, const uint32_t input) { seed += input * PRIME32_2; seed = xxh_rotl32(seed, 13); seed *= PRIME32_1; return seed; } uint32_t xxh32(const void *input, const size_t len, const uint32_t seed) { const uint8_t *p = (const uint8_t *)input; const uint8_t *b_end = p + len; uint32_t h32; if (len >= 16) { const uint8_t *const limit = b_end - 16; uint32_t v1 = seed + PRIME32_1 + PRIME32_2; uint32_t v2 = seed + PRIME32_2; uint32_t v3 = seed + 0; uint32_t v4 = seed - PRIME32_1; do { v1 = xxh32_round(v1, get_unaligned_le32(p)); p += 4; v2 = xxh32_round(v2, get_unaligned_le32(p)); p += 4; v3 = xxh32_round(v3, get_unaligned_le32(p)); p += 4; v4 = xxh32_round(v4, get_unaligned_le32(p)); p += 4; } while (p <= limit); h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) + xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18); } else { h32 = seed + PRIME32_5; } h32 += (uint32_t)len; while (p + 4 <= b_end) { h32 += get_unaligned_le32(p) * PRIME32_3; h32 = xxh_rotl32(h32, 17) * PRIME32_4; p += 4; } while (p < b_end) { h32 += (*p) * PRIME32_5; h32 = xxh_rotl32(h32, 11) * PRIME32_1; p++; } h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } EXPORT_SYMBOL(xxh32); static uint64_t xxh64_round(uint64_t acc, const uint64_t input) { acc += input * PRIME64_2; acc = xxh_rotl64(acc, 31); acc *= PRIME64_1; return acc; } static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val) { val = xxh64_round(0, val); acc ^= val; acc = acc * PRIME64_1 + PRIME64_4; return acc; } uint64_t xxh64(const void *input, const size_t len, const uint64_t seed) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; uint64_t h64; if (len >= 32) { const uint8_t *const limit = b_end - 32; uint64_t v1 = seed + PRIME64_1 + PRIME64_2; uint64_t v2 = seed + PRIME64_2; uint64_t v3 = seed + 0; uint64_t v4 = seed - PRIME64_1; do { v1 = xxh64_round(v1, get_unaligned_le64(p)); p += 8; v2 = xxh64_round(v2, get_unaligned_le64(p)); p += 8; v3 = xxh64_round(v3, get_unaligned_le64(p)); p += 8; v4 = xxh64_round(v4, get_unaligned_le64(p)); p += 8; } while (p <= limit); h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); h64 = xxh64_merge_round(h64, v1); h64 = xxh64_merge_round(h64, v2); h64 = xxh64_merge_round(h64, v3); h64 = xxh64_merge_round(h64, v4); } else { h64 = seed + PRIME64_5; } h64 += (uint64_t)len; while (p + 8 <= b_end) { const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); h64 ^= k1; h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; p += 8; } if (p + 4 <= b_end) { h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p += 4; } while (p < b_end) { h64 ^= (*p) * PRIME64_5; h64 = xxh_rotl64(h64, 11) * PRIME64_1; p++; } h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } EXPORT_SYMBOL(xxh64); /*-************************************************** * Advanced Hash Functions ***************************************************/ void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed) { /* use a local state for memcpy() to avoid strict-aliasing warnings */ struct xxh32_state state; memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; state.v4 = seed - PRIME32_1; memcpy(statePtr, &state, sizeof(state)); } EXPORT_SYMBOL(xxh32_reset); void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) { /* use a local state for memcpy() to avoid strict-aliasing warnings */ struct xxh64_state state; memset(&state, 0, sizeof(state)); state.v1 = seed + PRIME64_1 + PRIME64_2; state.v2 = seed + PRIME64_2; state.v3 = seed + 0; state.v4 = seed - PRIME64_1; memcpy(statePtr, &state, sizeof(state)); } EXPORT_SYMBOL(xxh64_reset); int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; if (input == NULL) return -EINVAL; state->total_len_32 += (uint32_t)len; state->large_len |= (len >= 16) | (state->total_len_32 >= 16); if (state->memsize + len < 16) { /* fill in tmp buffer */ memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); state->memsize += (uint32_t)len; return 0; } if (state->memsize) { /* some data left from previous update */ const uint32_t *p32 = state->mem32; memcpy((uint8_t *)(state->mem32) + state->memsize, input, 16 - state->memsize); state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); p32++; state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); p32++; state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); p32++; state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); p32++; p += 16-state->memsize; state->memsize = 0; } if (p <= b_end - 16) { const uint8_t *const limit = b_end - 16; uint32_t v1 = state->v1; uint32_t v2 = state->v2; uint32_t v3 = state->v3; uint32_t v4 = state->v4; do { v1 = xxh32_round(v1, get_unaligned_le32(p)); p += 4; v2 = xxh32_round(v2, get_unaligned_le32(p)); p += 4; v3 = xxh32_round(v3, get_unaligned_le32(p)); p += 4; v4 = xxh32_round(v4, get_unaligned_le32(p)); p += 4; } while (p <= limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < b_end) { memcpy(state->mem32, p, (size_t)(b_end-p)); state->memsize = (uint32_t)(b_end-p); } return 0; } EXPORT_SYMBOL(xxh32_update); uint32_t xxh32_digest(const struct xxh32_state *state) { const uint8_t *p = (const uint8_t *)state->mem32; const uint8_t *const b_end = (const uint8_t *)(state->mem32) + state->memsize; uint32_t h32; if (state->large_len) { h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); } else { h32 = state->v3 /* == seed */ + PRIME32_5; } h32 += state->total_len_32; while (p + 4 <= b_end) { h32 += get_unaligned_le32(p) * PRIME32_3; h32 = xxh_rotl32(h32, 17) * PRIME32_4; p += 4; } while (p < b_end) { h32 += (*p) * PRIME32_5; h32 = xxh_rotl32(h32, 11) * PRIME32_1; p++; } h32 ^= h32 >> 15; h32 *= PRIME32_2; h32 ^= h32 >> 13; h32 *= PRIME32_3; h32 ^= h32 >> 16; return h32; } EXPORT_SYMBOL(xxh32_digest); int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; const uint8_t *const b_end = p + len; if (input == NULL) return -EINVAL; state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ memcpy(((uint8_t *)state->mem64) + state->memsize, input, len); state->memsize += (uint32_t)len; return 0; } if (state->memsize) { /* tmp buffer is full */ uint64_t *p64 = state->mem64; memcpy(((uint8_t *)p64) + state->memsize, input, 32 - state->memsize); state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64)); p64++; state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64)); p64++; state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64)); p64++; state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64)); p += 32 - state->memsize; state->memsize = 0; } if (p + 32 <= b_end) { const uint8_t *const limit = b_end - 32; uint64_t v1 = state->v1; uint64_t v2 = state->v2; uint64_t v3 = state->v3; uint64_t v4 = state->v4; do { v1 = xxh64_round(v1, get_unaligned_le64(p)); p += 8; v2 = xxh64_round(v2, get_unaligned_le64(p)); p += 8; v3 = xxh64_round(v3, get_unaligned_le64(p)); p += 8; v4 = xxh64_round(v4, get_unaligned_le64(p)); p += 8; } while (p <= limit); state->v1 = v1; state->v2 = v2; state->v3 = v3; state->v4 = v4; } if (p < b_end) { memcpy(state->mem64, p, (size_t)(b_end-p)); state->memsize = (uint32_t)(b_end - p); } return 0; } EXPORT_SYMBOL(xxh64_update); uint64_t xxh64_digest(const struct xxh64_state *state) { const uint8_t *p = (const uint8_t *)state->mem64; const uint8_t *const b_end = (const uint8_t *)state->mem64 + state->memsize; uint64_t h64; if (state->total_len >= 32) { const uint64_t v1 = state->v1; const uint64_t v2 = state->v2; const uint64_t v3 = state->v3; const uint64_t v4 = state->v4; h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) + xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18); h64 = xxh64_merge_round(h64, v1); h64 = xxh64_merge_round(h64, v2); h64 = xxh64_merge_round(h64, v3); h64 = xxh64_merge_round(h64, v4); } else { h64 = state->v3 + PRIME64_5; } h64 += (uint64_t)state->total_len; while (p + 8 <= b_end) { const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p)); h64 ^= k1; h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4; p += 8; } if (p + 4 <= b_end) { h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1; h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; p += 4; } while (p < b_end) { h64 ^= (*p) * PRIME64_5; h64 = xxh_rotl64(h64, 11) * PRIME64_1; p++; } h64 ^= h64 >> 33; h64 *= PRIME64_2; h64 ^= h64 >> 29; h64 *= PRIME64_3; h64 ^= h64 >> 32; return h64; } EXPORT_SYMBOL(xxh64_digest); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("xxHash"); |
46 46 44 35 44 46 46 46 46 46 46 44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 | // SPDX-License-Identifier: LGPL-2.1-or-later /* * dvb_demux.c - DVB kernel demux API * * Copyright (C) 2000-2001 Ralph Metzler <ralph@convergence.de> * & Marcus Metzler <marcus@convergence.de> * for convergence integrated media GmbH */ #define pr_fmt(fmt) "dvb_demux: " fmt #include <linux/sched/signal.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/string.h> #include <linux/crc32.h> #include <linux/uaccess.h> #include <asm/div64.h> #include <media/dvb_demux.h> static int dvb_demux_tscheck; module_param(dvb_demux_tscheck, int, 0644); MODULE_PARM_DESC(dvb_demux_tscheck, "enable transport stream continuity and TEI check"); static int dvb_demux_speedcheck; module_param(dvb_demux_speedcheck, int, 0644); MODULE_PARM_DESC(dvb_demux_speedcheck, "enable transport stream speed check"); static int dvb_demux_feed_err_pkts = 1; module_param(dvb_demux_feed_err_pkts, int, 0644); MODULE_PARM_DESC(dvb_demux_feed_err_pkts, "when set to 0, drop packets with the TEI bit set (1 by default)"); #define dprintk(fmt, arg...) \ printk(KERN_DEBUG pr_fmt("%s: " fmt), __func__, ##arg) #define dprintk_tscheck(x...) do { \ if (dvb_demux_tscheck && printk_ratelimit()) \ dprintk(x); \ } while (0) #ifdef CONFIG_DVB_DEMUX_SECTION_LOSS_LOG # define dprintk_sect_loss(x...) dprintk(x) #else # define dprintk_sect_loss(x...) #endif #define set_buf_flags(__feed, __flag) \ do { \ (__feed)->buffer_flags |= (__flag); \ } while (0) /****************************************************************************** * static inlined helper functions ******************************************************************************/ static inline u16 section_length(const u8 *buf) { return 3 + ((buf[1] & 0x0f) << 8) + buf[2]; } static inline u16 ts_pid(const u8 *buf) { return ((buf[1] & 0x1f) << 8) + buf[2]; } static inline u8 payload(const u8 *tsp) { if (!(tsp[3] & 0x10)) // no payload? return 0; if (tsp[3] & 0x20) { // adaptation field? if (tsp[4] > 183) // corrupted data? return 0; else return 184 - 1 - tsp[4]; } return 184; } static u32 dvb_dmx_crc32(struct dvb_demux_feed *f, const u8 *src, size_t len) { return (f->feed.sec.crc_val = crc32_be(f->feed.sec.crc_val, src, len)); } static void dvb_dmx_memcopy(struct dvb_demux_feed *f, u8 *d, const u8 *s, size_t len) { memcpy(d, s, len); } /****************************************************************************** * Software filter functions ******************************************************************************/ static inline int dvb_dmx_swfilter_payload(struct dvb_demux_feed *feed, const u8 *buf) { int count = payload(buf); int p; int ccok; u8 cc; if (count == 0) return -1; p = 188 - count; cc = buf[3] & 0x0f; ccok = ((feed->cc + 1) & 0x0f) == cc; if (!ccok) { set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); dprintk_sect_loss("missed packet: %d instead of %d!\n", cc, (feed->cc + 1) & 0x0f); } feed->cc = cc; if (buf[1] & 0x40) // PUSI ? feed->peslen = 0xfffa; feed->peslen += count; return feed->cb.ts(&buf[p], count, NULL, 0, &feed->feed.ts, &feed->buffer_flags); } static int dvb_dmx_swfilter_sectionfilter(struct dvb_demux_feed *feed, struct dvb_demux_filter *f) { u8 neq = 0; int i; for (i = 0; i < DVB_DEMUX_MASK_MAX; i++) { u8 xor = f->filter.filter_value[i] ^ feed->feed.sec.secbuf[i]; if (f->maskandmode[i] & xor) return 0; neq |= f->maskandnotmode[i] & xor; } if (f->doneq && !neq) return 0; return feed->cb.sec(feed->feed.sec.secbuf, feed->feed.sec.seclen, NULL, 0, &f->filter, &feed->buffer_flags); } static inline int dvb_dmx_swfilter_section_feed(struct dvb_demux_feed *feed) { struct dvb_demux *demux = feed->demux; struct dvb_demux_filter *f = feed->filter; struct dmx_section_feed *sec = &feed->feed.sec; int section_syntax_indicator; if (!sec->is_filtering) return 0; if (!f) return 0; if (sec->check_crc) { section_syntax_indicator = ((sec->secbuf[1] & 0x80) != 0); if (section_syntax_indicator && demux->check_crc32(feed, sec->secbuf, sec->seclen)) { set_buf_flags(feed, DMX_BUFFER_FLAG_HAD_CRC32_DISCARD); return -1; } } do { if (dvb_dmx_swfilter_sectionfilter(feed, f) < 0) return -1; } while ((f = f->next) && sec->is_filtering); sec->seclen = 0; return 0; } static void dvb_dmx_swfilter_section_new(struct dvb_demux_feed *feed) { struct dmx_section_feed *sec = &feed->feed.sec; if (sec->secbufp < sec->tsfeedp) { int n = sec->tsfeedp - sec->secbufp; /* * Section padding is done with 0xff bytes entirely. * Due to speed reasons, we won't check all of them * but just first and last. */ if (sec->secbuf[0] != 0xff || sec->secbuf[n - 1] != 0xff) { set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); dprintk_sect_loss("section ts padding loss: %d/%d\n", n, sec->tsfeedp); dprintk_sect_loss("pad data: %*ph\n", n, sec->secbuf); } } sec->tsfeedp = sec->secbufp = sec->seclen = 0; sec->secbuf = sec->secbuf_base; } /* * Losless Section Demux 1.4.1 by Emard * Valsecchi Patrick: * - middle of section A (no PUSI) * - end of section A and start of section B * (with PUSI pointing to the start of the second section) * * In this case, without feed->pusi_seen you'll receive a garbage section * consisting of the end of section A. Basically because tsfeedp * is incemented and the use=0 condition is not raised * when the second packet arrives. * * Fix: * when demux is started, let feed->pusi_seen = false to * prevent initial feeding of garbage from the end of * previous section. When you for the first time see PUSI=1 * then set feed->pusi_seen = true */ static int dvb_dmx_swfilter_section_copy_dump(struct dvb_demux_feed *feed, const u8 *buf, u8 len) { struct dvb_demux *demux = feed->demux; struct dmx_section_feed *sec = &feed->feed.sec; u16 limit, seclen; if (sec->tsfeedp >= DMX_MAX_SECFEED_SIZE) return 0; if (sec->tsfeedp + len > DMX_MAX_SECFEED_SIZE) { set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); dprintk_sect_loss("section buffer full loss: %d/%d\n", sec->tsfeedp + len - DMX_MAX_SECFEED_SIZE, DMX_MAX_SECFEED_SIZE); len = DMX_MAX_SECFEED_SIZE - sec->tsfeedp; } if (len <= 0) return 0; demux->memcopy(feed, sec->secbuf_base + sec->tsfeedp, buf, len); sec->tsfeedp += len; /* * Dump all the sections we can find in the data (Emard) */ limit = sec->tsfeedp; if (limit > DMX_MAX_SECFEED_SIZE) return -1; /* internal error should never happen */ /* to be sure always set secbuf */ sec->secbuf = sec->secbuf_base + sec->secbufp; while (sec->secbufp + 2 < limit) { seclen = section_length(sec->secbuf); if (seclen <= 0 || seclen > DMX_MAX_SECTION_SIZE || seclen + sec->secbufp > limit) return 0; sec->seclen = seclen; sec->crc_val = ~0; /* dump [secbuf .. secbuf+seclen) */ if (feed->pusi_seen) { dvb_dmx_swfilter_section_feed(feed); } else { set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); dprintk_sect_loss("pusi not seen, discarding section data\n"); } sec->secbufp += seclen; /* secbufp and secbuf moving together is */ sec->secbuf += seclen; /* redundant but saves pointer arithmetic */ } return 0; } static int dvb_dmx_swfilter_section_packet(struct dvb_demux_feed *feed, const u8 *buf) { u8 p, count; int ccok, dc_i = 0; u8 cc; count = payload(buf); if (count == 0) /* count == 0 if no payload or out of range */ return -1; p = 188 - count; /* payload start */ cc = buf[3] & 0x0f; ccok = ((feed->cc + 1) & 0x0f) == cc; if (buf[3] & 0x20) { /* adaption field present, check for discontinuity_indicator */ if ((buf[4] > 0) && (buf[5] & 0x80)) dc_i = 1; } if (!ccok || dc_i) { if (dc_i) { set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_INDICATOR); dprintk_sect_loss("%d frame with disconnect indicator\n", cc); } else { set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); dprintk_sect_loss("discontinuity: %d instead of %d. %d bytes lost\n", cc, (feed->cc + 1) & 0x0f, count + 4); } /* * those bytes under some circumstances will again be reported * in the following dvb_dmx_swfilter_section_new */ /* * Discontinuity detected. Reset pusi_seen to * stop feeding of suspicious data until next PUSI=1 arrives * * FIXME: does it make sense if the MPEG-TS is the one * reporting discontinuity? */ feed->pusi_seen = false; dvb_dmx_swfilter_section_new(feed); } feed->cc = cc; if (buf[1] & 0x40) { /* PUSI=1 (is set), section boundary is here */ if (count > 1 && buf[p] < count) { const u8 *before = &buf[p + 1]; u8 before_len = buf[p]; const u8 *after = &before[before_len]; u8 after_len = count - 1 - before_len; dvb_dmx_swfilter_section_copy_dump(feed, before, before_len); /* before start of new section, set pusi_seen */ feed->pusi_seen = true; dvb_dmx_swfilter_section_new(feed); dvb_dmx_swfilter_section_copy_dump(feed, after, after_len); } else if (count > 0) { set_buf_flags(feed, DMX_BUFFER_FLAG_DISCONTINUITY_DETECTED); dprintk_sect_loss("PUSI=1 but %d bytes lost\n", count); } } else { /* PUSI=0 (is not set), no section boundary */ dvb_dmx_swfilter_section_copy_dump(feed, &buf[p], count); } return 0; } static inline void dvb_dmx_swfilter_packet_type(struct dvb_demux_feed *feed, const u8 *buf) { switch (feed->type) { case DMX_TYPE_TS: if (!feed->feed.ts.is_filtering) break; if (feed->ts_type & TS_PACKET) { if (feed->ts_type & TS_PAYLOAD_ONLY) dvb_dmx_swfilter_payload(feed, buf); else feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts, &feed->buffer_flags); } /* Used only on full-featured devices */ if (feed->ts_type & TS_DECODER) if (feed->demux->write_to_decoder) feed->demux->write_to_decoder(feed, buf, 188); break; case DMX_TYPE_SEC: if (!feed->feed.sec.is_filtering) break; if (dvb_dmx_swfilter_section_packet(feed, buf) < 0) feed->feed.sec.seclen = feed->feed.sec.secbufp = 0; break; default: break; } } #define DVR_FEED(f) \ (((f)->type == DMX_TYPE_TS) && \ ((f)->feed.ts.is_filtering) && \ (((f)->ts_type & (TS_PACKET | TS_DEMUX)) == TS_PACKET)) static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) { struct dvb_demux_feed *feed; u16 pid = ts_pid(buf); int dvr_done = 0; if (dvb_demux_speedcheck) { ktime_t cur_time; u64 speed_bytes, speed_timedelta; demux->speed_pkts_cnt++; /* show speed every SPEED_PKTS_INTERVAL packets */ if (!(demux->speed_pkts_cnt % SPEED_PKTS_INTERVAL)) { cur_time = ktime_get(); if (ktime_to_ns(demux->speed_last_time) != 0) { speed_bytes = (u64)demux->speed_pkts_cnt * 188 * 8; /* convert to 1024 basis */ speed_bytes = 1000 * div64_u64(speed_bytes, 1024); speed_timedelta = ktime_ms_delta(cur_time, demux->speed_last_time); if (speed_timedelta) dprintk("TS speed %llu Kbits/sec \n", div64_u64(speed_bytes, speed_timedelta)); } demux->speed_last_time = cur_time; demux->speed_pkts_cnt = 0; } } if (buf[1] & 0x80) { list_for_each_entry(feed, &demux->feed_list, list_head) { if ((feed->pid != pid) && (feed->pid != 0x2000)) continue; set_buf_flags(feed, DMX_BUFFER_FLAG_TEI); } dprintk_tscheck("TEI detected. PID=0x%x data1=0x%x\n", pid, buf[1]); /* data in this packet can't be trusted - drop it unless * module option dvb_demux_feed_err_pkts is set */ if (!dvb_demux_feed_err_pkts) return; } else /* if TEI bit is set, pid may be wrong- skip pkt counter */ if (demux->cnt_storage && dvb_demux_tscheck) { /* check pkt counter */ if (pid < MAX_PID) { if (buf[3] & 0x10) demux->cnt_storage[pid] = (demux->cnt_storage[pid] + 1) & 0xf; if ((buf[3] & 0xf) != demux->cnt_storage[pid]) { list_for_each_entry(feed, &demux->feed_list, list_head) { if ((feed->pid != pid) && (feed->pid != 0x2000)) continue; set_buf_flags(feed, DMX_BUFFER_PKT_COUNTER_MISMATCH); } dprintk_tscheck("TS packet counter mismatch. PID=0x%x expected 0x%x got 0x%x\n", pid, demux->cnt_storage[pid], buf[3] & 0xf); demux->cnt_storage[pid] = buf[3] & 0xf; } } /* end check */ } list_for_each_entry(feed, &demux->feed_list, list_head) { if ((feed->pid != pid) && (feed->pid != 0x2000)) continue; /* copy each packet only once to the dvr device, even * if a PID is in multiple filters (e.g. video + PCR) */ if ((DVR_FEED(feed)) && (dvr_done++)) continue; if (feed->pid == pid) dvb_dmx_swfilter_packet_type(feed, buf); else if (feed->pid == 0x2000) feed->cb.ts(buf, 188, NULL, 0, &feed->feed.ts, &feed->buffer_flags); } } void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf, size_t count) { unsigned long flags; spin_lock_irqsave(&demux->lock, flags); while (count--) { if (buf[0] == 0x47) dvb_dmx_swfilter_packet(demux, buf); buf += 188; } spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter_packets); static inline int find_next_packet(const u8 *buf, int pos, size_t count, const int pktsize) { int start = pos, lost; while (pos < count) { if (buf[pos] == 0x47 || (pktsize == 204 && buf[pos] == 0xB8)) break; pos++; } lost = pos - start; if (lost) { /* This garbage is part of a valid packet? */ int backtrack = pos - pktsize; if (backtrack >= 0 && (buf[backtrack] == 0x47 || (pktsize == 204 && buf[backtrack] == 0xB8))) return backtrack; } return pos; } /* Filter all pktsize= 188 or 204 sized packets and skip garbage. */ static inline void _dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count, const int pktsize) { int p = 0, i, j; const u8 *q; unsigned long flags; spin_lock_irqsave(&demux->lock, flags); if (demux->tsbufp) { /* tsbuf[0] is now 0x47. */ i = demux->tsbufp; j = pktsize - i; if (count < j) { memcpy(&demux->tsbuf[i], buf, count); demux->tsbufp += count; goto bailout; } memcpy(&demux->tsbuf[i], buf, j); if (demux->tsbuf[0] == 0x47) /* double check */ dvb_dmx_swfilter_packet(demux, demux->tsbuf); demux->tsbufp = 0; p += j; } while (1) { p = find_next_packet(buf, p, count, pktsize); if (p >= count) break; if (count - p < pktsize) break; q = &buf[p]; if (pktsize == 204 && (*q == 0xB8)) { memcpy(demux->tsbuf, q, 188); demux->tsbuf[0] = 0x47; q = demux->tsbuf; } dvb_dmx_swfilter_packet(demux, q); p += pktsize; } i = count - p; if (i) { memcpy(demux->tsbuf, &buf[p], i); demux->tsbufp = i; if (pktsize == 204 && demux->tsbuf[0] == 0xB8) demux->tsbuf[0] = 0x47; } bailout: spin_unlock_irqrestore(&demux->lock, flags); } void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count) { _dvb_dmx_swfilter(demux, buf, count, 188); } EXPORT_SYMBOL(dvb_dmx_swfilter); void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count) { _dvb_dmx_swfilter(demux, buf, count, 204); } EXPORT_SYMBOL(dvb_dmx_swfilter_204); void dvb_dmx_swfilter_raw(struct dvb_demux *demux, const u8 *buf, size_t count) { unsigned long flags; spin_lock_irqsave(&demux->lock, flags); demux->feed->cb.ts(buf, count, NULL, 0, &demux->feed->feed.ts, &demux->feed->buffer_flags); spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter_raw); static struct dvb_demux_filter *dvb_dmx_filter_alloc(struct dvb_demux *demux) { int i; for (i = 0; i < demux->filternum; i++) if (demux->filter[i].state == DMX_STATE_FREE) break; if (i == demux->filternum) return NULL; demux->filter[i].state = DMX_STATE_ALLOCATED; return &demux->filter[i]; } static struct dvb_demux_feed *dvb_dmx_feed_alloc(struct dvb_demux *demux) { int i; for (i = 0; i < demux->feednum; i++) if (demux->feed[i].state == DMX_STATE_FREE) break; if (i == demux->feednum) return NULL; demux->feed[i].state = DMX_STATE_ALLOCATED; return &demux->feed[i]; } static int dvb_demux_feed_find(struct dvb_demux_feed *feed) { struct dvb_demux_feed *entry; list_for_each_entry(entry, &feed->demux->feed_list, list_head) if (entry == feed) return 1; return 0; } static void dvb_demux_feed_add(struct dvb_demux_feed *feed) { spin_lock_irq(&feed->demux->lock); if (dvb_demux_feed_find(feed)) { pr_err("%s: feed already in list (type=%x state=%x pid=%x)\n", __func__, feed->type, feed->state, feed->pid); goto out; } list_add(&feed->list_head, &feed->demux->feed_list); out: spin_unlock_irq(&feed->demux->lock); } static void dvb_demux_feed_del(struct dvb_demux_feed *feed) { spin_lock_irq(&feed->demux->lock); if (!(dvb_demux_feed_find(feed))) { pr_err("%s: feed not in list (type=%x state=%x pid=%x)\n", __func__, feed->type, feed->state, feed->pid); goto out; } list_del(&feed->list_head); out: spin_unlock_irq(&feed->demux->lock); } static int dmx_ts_feed_set(struct dmx_ts_feed *ts_feed, u16 pid, int ts_type, enum dmx_ts_pes pes_type, ktime_t timeout) { struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed; struct dvb_demux *demux = feed->demux; if (pid > DMX_MAX_PID) return -EINVAL; if (mutex_lock_interruptible(&demux->mutex)) return -ERESTARTSYS; if (ts_type & TS_DECODER) { if (pes_type >= DMX_PES_OTHER) { mutex_unlock(&demux->mutex); return -EINVAL; } if (demux->pesfilter[pes_type] && demux->pesfilter[pes_type] != feed) { mutex_unlock(&demux->mutex); return -EINVAL; } demux->pesfilter[pes_type] = feed; demux->pids[pes_type] = pid; } dvb_demux_feed_add(feed); feed->pid = pid; feed->timeout = timeout; feed->ts_type = ts_type; feed->pes_type = pes_type; feed->state = DMX_STATE_READY; mutex_unlock(&demux->mutex); return 0; } static int dmx_ts_feed_start_filtering(struct dmx_ts_feed *ts_feed) { struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed; struct dvb_demux *demux = feed->demux; int ret; if (mutex_lock_interruptible(&demux->mutex)) return -ERESTARTSYS; if (feed->state != DMX_STATE_READY || feed->type != DMX_TYPE_TS) { mutex_unlock(&demux->mutex); return -EINVAL; } if (!demux->start_feed) { mutex_unlock(&demux->mutex); return -ENODEV; } if ((ret = demux->start_feed(feed)) < 0) { mutex_unlock(&demux->mutex); return ret; } spin_lock_irq(&demux->lock); ts_feed->is_filtering = 1; feed->state = DMX_STATE_GO; spin_unlock_irq(&demux->lock); mutex_unlock(&demux->mutex); return 0; } static int dmx_ts_feed_stop_filtering(struct dmx_ts_feed *ts_feed) { struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed; struct dvb_demux *demux = feed->demux; int ret; mutex_lock(&demux->mutex); if (feed->state < DMX_STATE_GO) { mutex_unlock(&demux->mutex); return -EINVAL; } if (!demux->stop_feed) { mutex_unlock(&demux->mutex); return -ENODEV; } ret = demux->stop_feed(feed); spin_lock_irq(&demux->lock); ts_feed->is_filtering = 0; feed->state = DMX_STATE_ALLOCATED; spin_unlock_irq(&demux->lock); mutex_unlock(&demux->mutex); return ret; } static int dvbdmx_allocate_ts_feed(struct dmx_demux *dmx, struct dmx_ts_feed **ts_feed, dmx_ts_cb callback) { struct dvb_demux *demux = (struct dvb_demux *)dmx; struct dvb_demux_feed *feed; if (mutex_lock_interruptible(&demux->mutex)) return -ERESTARTSYS; if (!(feed = dvb_dmx_feed_alloc(demux))) { mutex_unlock(&demux->mutex); return -EBUSY; } feed->type = DMX_TYPE_TS; feed->cb.ts = callback; feed->demux = demux; feed->pid = 0xffff; feed->peslen = 0xfffa; feed->buffer_flags = 0; (*ts_feed) = &feed->feed.ts; (*ts_feed)->parent = dmx; (*ts_feed)->priv = NULL; (*ts_feed)->is_filtering = 0; (*ts_feed)->start_filtering = dmx_ts_feed_start_filtering; (*ts_feed)->stop_filtering = dmx_ts_feed_stop_filtering; (*ts_feed)->set = dmx_ts_feed_set; if (!(feed->filter = dvb_dmx_filter_alloc(demux))) { feed->state = DMX_STATE_FREE; mutex_unlock(&demux->mutex); return -EBUSY; } feed->filter->type = DMX_TYPE_TS; feed->filter->feed = feed; feed->filter->state = DMX_STATE_READY; mutex_unlock(&demux->mutex); return 0; } static int dvbdmx_release_ts_feed(struct dmx_demux *dmx, struct dmx_ts_feed *ts_feed) { struct dvb_demux *demux = (struct dvb_demux *)dmx; struct dvb_demux_feed *feed = (struct dvb_demux_feed *)ts_feed; mutex_lock(&demux->mutex); if (feed->state == DMX_STATE_FREE) { mutex_unlock(&demux->mutex); return -EINVAL; } feed->state = DMX_STATE_FREE; feed->filter->state = DMX_STATE_FREE; dvb_demux_feed_del(feed); feed->pid = 0xffff; if (feed->ts_type & TS_DECODER && feed->pes_type < DMX_PES_OTHER) demux->pesfilter[feed->pes_type] = NULL; mutex_unlock(&demux->mutex); return 0; } /****************************************************************************** * dmx_section_feed API calls ******************************************************************************/ static int dmx_section_feed_allocate_filter(struct dmx_section_feed *feed, struct dmx_section_filter **filter) { struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed; struct dvb_demux *dvbdemux = dvbdmxfeed->demux; struct dvb_demux_filter *dvbdmxfilter; if (mutex_lock_interruptible(&dvbdemux->mutex)) return -ERESTARTSYS; dvbdmxfilter = dvb_dmx_filter_alloc(dvbdemux); if (!dvbdmxfilter) { mutex_unlock(&dvbdemux->mutex); return -EBUSY; } spin_lock_irq(&dvbdemux->lock); *filter = &dvbdmxfilter->filter; (*filter)->parent = feed; (*filter)->priv = NULL; dvbdmxfilter->feed = dvbdmxfeed; dvbdmxfilter->type = DMX_TYPE_SEC; dvbdmxfilter->state = DMX_STATE_READY; dvbdmxfilter->next = dvbdmxfeed->filter; dvbdmxfeed->filter = dvbdmxfilter; spin_unlock_irq(&dvbdemux->lock); mutex_unlock(&dvbdemux->mutex); return 0; } static int dmx_section_feed_set(struct dmx_section_feed *feed, u16 pid, int check_crc) { struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed; struct dvb_demux *dvbdmx = dvbdmxfeed->demux; if (pid > 0x1fff) return -EINVAL; if (mutex_lock_interruptible(&dvbdmx->mutex)) return -ERESTARTSYS; dvb_demux_feed_add(dvbdmxfeed); dvbdmxfeed->pid = pid; dvbdmxfeed->feed.sec.check_crc = check_crc; dvbdmxfeed->state = DMX_STATE_READY; mutex_unlock(&dvbdmx->mutex); return 0; } static void prepare_secfilters(struct dvb_demux_feed *dvbdmxfeed) { int i; struct dvb_demux_filter *f; struct dmx_section_filter *sf; u8 mask, mode, doneq; if (!(f = dvbdmxfeed->filter)) return; do { sf = &f->filter; doneq = false; for (i = 0; i < DVB_DEMUX_MASK_MAX; i++) { mode = sf->filter_mode[i]; mask = sf->filter_mask[i]; f->maskandmode[i] = mask & mode; doneq |= f->maskandnotmode[i] = mask & ~mode; } f->doneq = doneq ? true : false; } while ((f = f->next)); } static int dmx_section_feed_start_filtering(struct dmx_section_feed *feed) { struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed; struct dvb_demux *dvbdmx = dvbdmxfeed->demux; int ret; if (mutex_lock_interruptible(&dvbdmx->mutex)) return -ERESTARTSYS; if (feed->is_filtering) { mutex_unlock(&dvbdmx->mutex); return -EBUSY; } if (!dvbdmxfeed->filter) { mutex_unlock(&dvbdmx->mutex); return -EINVAL; } dvbdmxfeed->feed.sec.tsfeedp = 0; dvbdmxfeed->feed.sec.secbuf = dvbdmxfeed->feed.sec.secbuf_base; dvbdmxfeed->feed.sec.secbufp = 0; dvbdmxfeed->feed.sec.seclen = 0; dvbdmxfeed->pusi_seen = false; if (!dvbdmx->start_feed) { mutex_unlock(&dvbdmx->mutex); return -ENODEV; } prepare_secfilters(dvbdmxfeed); if ((ret = dvbdmx->start_feed(dvbdmxfeed)) < 0) { mutex_unlock(&dvbdmx->mutex); return ret; } spin_lock_irq(&dvbdmx->lock); feed->is_filtering = 1; dvbdmxfeed->state = DMX_STATE_GO; spin_unlock_irq(&dvbdmx->lock); mutex_unlock(&dvbdmx->mutex); return 0; } static int dmx_section_feed_stop_filtering(struct dmx_section_feed *feed) { struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed; struct dvb_demux *dvbdmx = dvbdmxfeed->demux; int ret; mutex_lock(&dvbdmx->mutex); if (!dvbdmx->stop_feed) { mutex_unlock(&dvbdmx->mutex); return -ENODEV; } ret = dvbdmx->stop_feed(dvbdmxfeed); spin_lock_irq(&dvbdmx->lock); dvbdmxfeed->state = DMX_STATE_READY; feed->is_filtering = 0; spin_unlock_irq(&dvbdmx->lock); mutex_unlock(&dvbdmx->mutex); return ret; } static int dmx_section_feed_release_filter(struct dmx_section_feed *feed, struct dmx_section_filter *filter) { struct dvb_demux_filter *dvbdmxfilter = (struct dvb_demux_filter *)filter, *f; struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed; struct dvb_demux *dvbdmx = dvbdmxfeed->demux; mutex_lock(&dvbdmx->mutex); if (dvbdmxfilter->feed != dvbdmxfeed) { mutex_unlock(&dvbdmx->mutex); return -EINVAL; } if (feed->is_filtering) { /* release dvbdmx->mutex as far as it is acquired by stop_filtering() itself */ mutex_unlock(&dvbdmx->mutex); feed->stop_filtering(feed); mutex_lock(&dvbdmx->mutex); } spin_lock_irq(&dvbdmx->lock); f = dvbdmxfeed->filter; if (f == dvbdmxfilter) { dvbdmxfeed->filter = dvbdmxfilter->next; } else { while (f->next != dvbdmxfilter) f = f->next; f->next = f->next->next; } dvbdmxfilter->state = DMX_STATE_FREE; spin_unlock_irq(&dvbdmx->lock); mutex_unlock(&dvbdmx->mutex); return 0; } static int dvbdmx_allocate_section_feed(struct dmx_demux *demux, struct dmx_section_feed **feed, dmx_section_cb callback) { struct dvb_demux *dvbdmx = (struct dvb_demux *)demux; struct dvb_demux_feed *dvbdmxfeed; if (mutex_lock_interruptible(&dvbdmx->mutex)) return -ERESTARTSYS; if (!(dvbdmxfeed = dvb_dmx_feed_alloc(dvbdmx))) { mutex_unlock(&dvbdmx->mutex); return -EBUSY; } dvbdmxfeed->type = DMX_TYPE_SEC; dvbdmxfeed->cb.sec = callback; dvbdmxfeed->demux = dvbdmx; dvbdmxfeed->pid = 0xffff; dvbdmxfeed->buffer_flags = 0; dvbdmxfeed->feed.sec.secbuf = dvbdmxfeed->feed.sec.secbuf_base; dvbdmxfeed->feed.sec.secbufp = dvbdmxfeed->feed.sec.seclen = 0; dvbdmxfeed->feed.sec.tsfeedp = 0; dvbdmxfeed->filter = NULL; (*feed) = &dvbdmxfeed->feed.sec; (*feed)->is_filtering = 0; (*feed)->parent = demux; (*feed)->priv = NULL; (*feed)->set = dmx_section_feed_set; (*feed)->allocate_filter = dmx_section_feed_allocate_filter; (*feed)->start_filtering = dmx_section_feed_start_filtering; (*feed)->stop_filtering = dmx_section_feed_stop_filtering; (*feed)->release_filter = dmx_section_feed_release_filter; mutex_unlock(&dvbdmx->mutex); return 0; } static int dvbdmx_release_section_feed(struct dmx_demux *demux, struct dmx_section_feed *feed) { struct dvb_demux_feed *dvbdmxfeed = (struct dvb_demux_feed *)feed; struct dvb_demux *dvbdmx = (struct dvb_demux *)demux; mutex_lock(&dvbdmx->mutex); if (dvbdmxfeed->state == DMX_STATE_FREE) { mutex_unlock(&dvbdmx->mutex); return -EINVAL; } dvbdmxfeed->state = DMX_STATE_FREE; dvb_demux_feed_del(dvbdmxfeed); dvbdmxfeed->pid = 0xffff; mutex_unlock(&dvbdmx->mutex); return 0; } /****************************************************************************** * dvb_demux kernel data API calls ******************************************************************************/ static int dvbdmx_open(struct dmx_demux *demux) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; if (dvbdemux->users >= MAX_DVB_DEMUX_USERS) return -EUSERS; dvbdemux->users++; return 0; } static int dvbdmx_close(struct dmx_demux *demux) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; if (dvbdemux->users == 0) return -ENODEV; dvbdemux->users--; //FIXME: release any unneeded resources if users==0 return 0; } static int dvbdmx_write(struct dmx_demux *demux, const char __user *buf, size_t count) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; void *p; if ((!demux->frontend) || (demux->frontend->source != DMX_MEMORY_FE)) return -EINVAL; p = memdup_user(buf, count); if (IS_ERR(p)) return PTR_ERR(p); if (mutex_lock_interruptible(&dvbdemux->mutex)) { kfree(p); return -ERESTARTSYS; } dvb_dmx_swfilter(dvbdemux, p, count); kfree(p); mutex_unlock(&dvbdemux->mutex); if (signal_pending(current)) return -EINTR; return count; } static int dvbdmx_add_frontend(struct dmx_demux *demux, struct dmx_frontend *frontend) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; struct list_head *head = &dvbdemux->frontend_list; list_add(&(frontend->connectivity_list), head); return 0; } static int dvbdmx_remove_frontend(struct dmx_demux *demux, struct dmx_frontend *frontend) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; struct list_head *pos, *n, *head = &dvbdemux->frontend_list; list_for_each_safe(pos, n, head) { if (DMX_FE_ENTRY(pos) == frontend) { list_del(pos); return 0; } } return -ENODEV; } static struct list_head *dvbdmx_get_frontends(struct dmx_demux *demux) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; if (list_empty(&dvbdemux->frontend_list)) return NULL; return &dvbdemux->frontend_list; } static int dvbdmx_connect_frontend(struct dmx_demux *demux, struct dmx_frontend *frontend) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; if (demux->frontend) return -EINVAL; mutex_lock(&dvbdemux->mutex); demux->frontend = frontend; mutex_unlock(&dvbdemux->mutex); return 0; } static int dvbdmx_disconnect_frontend(struct dmx_demux *demux) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; mutex_lock(&dvbdemux->mutex); demux->frontend = NULL; mutex_unlock(&dvbdemux->mutex); return 0; } static int dvbdmx_get_pes_pids(struct dmx_demux *demux, u16 * pids) { struct dvb_demux *dvbdemux = (struct dvb_demux *)demux; memcpy(pids, dvbdemux->pids, 5 * sizeof(u16)); return 0; } int dvb_dmx_init(struct dvb_demux *dvbdemux) { int i; struct dmx_demux *dmx = &dvbdemux->dmx; dvbdemux->cnt_storage = NULL; dvbdemux->users = 0; dvbdemux->filter = vmalloc(array_size(sizeof(struct dvb_demux_filter), dvbdemux->filternum)); if (!dvbdemux->filter) return -ENOMEM; dvbdemux->feed = vmalloc(array_size(sizeof(struct dvb_demux_feed), dvbdemux->feednum)); if (!dvbdemux->feed) { vfree(dvbdemux->filter); dvbdemux->filter = NULL; return -ENOMEM; } for (i = 0; i < dvbdemux->filternum; i++) { dvbdemux->filter[i].state = DMX_STATE_FREE; dvbdemux->filter[i].index = i; } for (i = 0; i < dvbdemux->feednum; i++) { dvbdemux->feed[i].state = DMX_STATE_FREE; dvbdemux->feed[i].index = i; } dvbdemux->cnt_storage = vmalloc(MAX_PID + 1); if (!dvbdemux->cnt_storage) pr_warn("Couldn't allocate memory for TS/TEI check. Disabling it\n"); INIT_LIST_HEAD(&dvbdemux->frontend_list); for (i = 0; i < DMX_PES_OTHER; i++) { dvbdemux->pesfilter[i] = NULL; dvbdemux->pids[i] = 0xffff; } INIT_LIST_HEAD(&dvbdemux->feed_list); dvbdemux->playing = 0; dvbdemux->recording = 0; dvbdemux->tsbufp = 0; if (!dvbdemux->check_crc32) dvbdemux->check_crc32 = dvb_dmx_crc32; if (!dvbdemux->memcopy) dvbdemux->memcopy = dvb_dmx_memcopy; dmx->frontend = NULL; dmx->priv = dvbdemux; dmx->open = dvbdmx_open; dmx->close = dvbdmx_close; dmx->write = dvbdmx_write; dmx->allocate_ts_feed = dvbdmx_allocate_ts_feed; dmx->release_ts_feed = dvbdmx_release_ts_feed; dmx->allocate_section_feed = dvbdmx_allocate_section_feed; dmx->release_section_feed = dvbdmx_release_section_feed; dmx->add_frontend = dvbdmx_add_frontend; dmx->remove_frontend = dvbdmx_remove_frontend; dmx->get_frontends = dvbdmx_get_frontends; dmx->connect_frontend = dvbdmx_connect_frontend; dmx->disconnect_frontend = dvbdmx_disconnect_frontend; dmx->get_pes_pids = dvbdmx_get_pes_pids; mutex_init(&dvbdemux->mutex); spin_lock_init(&dvbdemux->lock); return 0; } EXPORT_SYMBOL(dvb_dmx_init); void dvb_dmx_release(struct dvb_demux *dvbdemux) { vfree(dvbdemux->cnt_storage); vfree(dvbdemux->filter); vfree(dvbdemux->feed); } EXPORT_SYMBOL(dvb_dmx_release); |
7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright Gavin Shan, IBM Corporation 2016. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/of.h> #include <linux/platform_device.h> #include <net/ncsi.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/addrconf.h> #include <net/ipv6.h> #include <net/genetlink.h> #include "internal.h" #include "ncsi-pkt.h" #include "ncsi-netlink.h" LIST_HEAD(ncsi_dev_list); DEFINE_SPINLOCK(ncsi_dev_lock); bool ncsi_channel_has_link(struct ncsi_channel *channel) { return !!(channel->modes[NCSI_MODE_LINK].data[2] & 0x1); } bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp, struct ncsi_channel *channel) { struct ncsi_package *np; struct ncsi_channel *nc; NCSI_FOR_EACH_PACKAGE(ndp, np) NCSI_FOR_EACH_CHANNEL(np, nc) { if (nc == channel) continue; if (nc->state == NCSI_CHANNEL_ACTIVE && ncsi_channel_has_link(nc)) return false; } return true; } static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc; unsigned long flags; nd->state = ncsi_dev_state_functional; if (force_down) { nd->link_up = 0; goto report; } nd->link_up = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); if (!list_empty(&nc->link) || nc->state != NCSI_CHANNEL_ACTIVE) { spin_unlock_irqrestore(&nc->lock, flags); continue; } if (ncsi_channel_has_link(nc)) { spin_unlock_irqrestore(&nc->lock, flags); nd->link_up = 1; goto report; } spin_unlock_irqrestore(&nc->lock, flags); } } report: nd->handler(nd); } static void ncsi_channel_monitor(struct timer_list *t) { struct ncsi_channel *nc = from_timer(nc, t, monitor.timer); struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_channel_mode *ncm; struct ncsi_cmd_arg nca; bool enabled, chained; unsigned int monitor_state; unsigned long flags; int state, ret; spin_lock_irqsave(&nc->lock, flags); state = nc->state; chained = !list_empty(&nc->link); enabled = nc->monitor.enabled; monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); if (!enabled) return; /* expected race disabling timer */ if (WARN_ON_ONCE(chained)) goto bad_state; if (state != NCSI_CHANNEL_INACTIVE && state != NCSI_CHANNEL_ACTIVE) { bad_state: netdev_warn(ndp->ndev.dev, "Bad NCSI monitor state channel %d 0x%x %s queue\n", nc->id, state, chained ? "on" : "off"); spin_lock_irqsave(&nc->lock, flags); nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); return; } switch (monitor_state) { case NCSI_CHANNEL_MONITOR_START: case NCSI_CHANNEL_MONITOR_RETRY: nca.ndp = ndp; nca.package = np->id; nca.channel = nc->id; nca.type = NCSI_PKT_CMD_GLS; nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); if (ret) netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", ret); break; case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; default: netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n", nc->id); ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; ncm = &nc->modes[NCSI_MODE_LINK]; spin_lock_irqsave(&nc->lock, flags); nc->monitor.enabled = false; nc->state = NCSI_CHANNEL_INVISIBLE; ncm->data[2] &= ~0x1; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); nc->state = NCSI_CHANNEL_ACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); return; } spin_lock_irqsave(&nc->lock, flags); nc->monitor.state++; spin_unlock_irqrestore(&nc->lock, flags); mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_start_channel_monitor(struct ncsi_channel *nc) { unsigned long flags; spin_lock_irqsave(&nc->lock, flags); WARN_ON_ONCE(nc->monitor.enabled); nc->monitor.enabled = true; nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_stop_channel_monitor(struct ncsi_channel *nc) { unsigned long flags; spin_lock_irqsave(&nc->lock, flags); if (!nc->monitor.enabled) { spin_unlock_irqrestore(&nc->lock, flags); return; } nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); del_timer_sync(&nc->monitor.timer); } struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, unsigned char id) { struct ncsi_channel *nc; NCSI_FOR_EACH_CHANNEL(np, nc) { if (nc->id == id) return nc; } return NULL; } struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) { struct ncsi_channel *nc, *tmp; int index; unsigned long flags; nc = kzalloc(sizeof(*nc), GFP_ATOMIC); if (!nc) return NULL; nc->id = id; nc->package = np; nc->state = NCSI_CHANNEL_INACTIVE; nc->monitor.enabled = false; timer_setup(&nc->monitor.timer, ncsi_channel_monitor, 0); spin_lock_init(&nc->lock); INIT_LIST_HEAD(&nc->link); for (index = 0; index < NCSI_CAP_MAX; index++) nc->caps[index].index = index; for (index = 0; index < NCSI_MODE_MAX; index++) nc->modes[index].index = index; spin_lock_irqsave(&np->lock, flags); tmp = ncsi_find_channel(np, id); if (tmp) { spin_unlock_irqrestore(&np->lock, flags); kfree(nc); return tmp; } list_add_tail_rcu(&nc->node, &np->channels); np->channel_num++; spin_unlock_irqrestore(&np->lock, flags); return nc; } static void ncsi_remove_channel(struct ncsi_channel *nc) { struct ncsi_package *np = nc->package; unsigned long flags; spin_lock_irqsave(&nc->lock, flags); /* Release filters */ kfree(nc->mac_filter.addrs); kfree(nc->vlan_filter.vids); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); /* Remove and free channel */ spin_lock_irqsave(&np->lock, flags); list_del_rcu(&nc->node); np->channel_num--; spin_unlock_irqrestore(&np->lock, flags); kfree(nc); } struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp, unsigned char id) { struct ncsi_package *np; NCSI_FOR_EACH_PACKAGE(ndp, np) { if (np->id == id) return np; } return NULL; } struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp, unsigned char id) { struct ncsi_package *np, *tmp; unsigned long flags; np = kzalloc(sizeof(*np), GFP_ATOMIC); if (!np) return NULL; np->id = id; np->ndp = ndp; spin_lock_init(&np->lock); INIT_LIST_HEAD(&np->channels); np->channel_whitelist = UINT_MAX; spin_lock_irqsave(&ndp->lock, flags); tmp = ncsi_find_package(ndp, id); if (tmp) { spin_unlock_irqrestore(&ndp->lock, flags); kfree(np); return tmp; } list_add_tail_rcu(&np->node, &ndp->packages); ndp->package_num++; spin_unlock_irqrestore(&ndp->lock, flags); return np; } void ncsi_remove_package(struct ncsi_package *np) { struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_channel *nc, *tmp; unsigned long flags; /* Release all child channels */ list_for_each_entry_safe(nc, tmp, &np->channels, node) ncsi_remove_channel(nc); /* Remove and free package */ spin_lock_irqsave(&ndp->lock, flags); list_del_rcu(&np->node); ndp->package_num--; spin_unlock_irqrestore(&ndp->lock, flags); kfree(np); } void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, unsigned char id, struct ncsi_package **np, struct ncsi_channel **nc) { struct ncsi_package *p; struct ncsi_channel *c; p = ncsi_find_package(ndp, NCSI_PACKAGE_INDEX(id)); c = p ? ncsi_find_channel(p, NCSI_CHANNEL_INDEX(id)) : NULL; if (np) *np = p; if (nc) *nc = c; } /* For two consecutive NCSI commands, the packet IDs shouldn't * be same. Otherwise, the bogus response might be replied. So * the available IDs are allocated in round-robin fashion. */ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, unsigned int req_flags) { struct ncsi_request *nr = NULL; int i, limit = ARRAY_SIZE(ndp->requests); unsigned long flags; /* Check if there is one available request until the ceiling */ spin_lock_irqsave(&ndp->lock, flags); for (i = ndp->request_id; i < limit; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->flags = req_flags; ndp->request_id = i + 1; goto found; } /* Fail back to check from the starting cursor */ for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->flags = req_flags; ndp->request_id = i + 1; goto found; } found: spin_unlock_irqrestore(&ndp->lock, flags); return nr; } void ncsi_free_request(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; struct sk_buff *cmd, *rsp; unsigned long flags; bool driven; if (nr->enabled) { nr->enabled = false; del_timer_sync(&nr->timer); } spin_lock_irqsave(&ndp->lock, flags); cmd = nr->cmd; rsp = nr->rsp; nr->cmd = NULL; nr->rsp = NULL; nr->used = false; driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN); spin_unlock_irqrestore(&ndp->lock, flags); if (driven && cmd && --ndp->pending_req_num == 0) schedule_work(&ndp->work); /* Release command and response */ consume_skb(cmd); consume_skb(rsp); } struct ncsi_dev *ncsi_find_dev(struct net_device *dev) { struct ncsi_dev_priv *ndp; NCSI_FOR_EACH_DEV(ndp) { if (ndp->ndev.dev == dev) return &ndp->ndev; } return NULL; } static void ncsi_request_timeout(struct timer_list *t) { struct ncsi_request *nr = from_timer(nr, t, timer); struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_cmd_pkt *cmd; struct ncsi_package *np; struct ncsi_channel *nc; unsigned long flags; /* If the request already had associated response, * let the response handler to release it. */ spin_lock_irqsave(&ndp->lock, flags); nr->enabled = false; if (nr->rsp || !nr->cmd) { spin_unlock_irqrestore(&ndp->lock, flags); return; } spin_unlock_irqrestore(&ndp->lock, flags); if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) { if (nr->cmd) { /* Find the package */ cmd = (struct ncsi_cmd_pkt *) skb_network_header(nr->cmd); ncsi_find_package_and_channel(ndp, cmd->cmd.common.channel, &np, &nc); ncsi_send_netlink_timeout(nr, np, nc); } } /* Release the request */ ncsi_free_request(nr); } static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc, *tmp; struct ncsi_cmd_arg nca; unsigned long flags; int ret; np = ndp->active_package; nc = ndp->active_channel; nca.ndp = ndp; nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_suspend: nd->state = ncsi_dev_state_suspend_select; fallthrough; case ncsi_dev_state_suspend_select: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else nca.bytes[0] = 1; /* To retrieve the last link states of channels in current * package when current active channel needs fail over to * another one. It means we will possibly select another * channel as next active one. The link states of channels * are most important factor of the selection. So we need * accurate link states. Unfortunately, the link states on * inactive channels can't be updated with LSC AEN in time. */ if (ndp->flags & NCSI_DEV_RESHUFFLE) nd->state = ncsi_dev_state_suspend_gls; else nd->state = ncsi_dev_state_suspend_dcnt; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; break; case ncsi_dev_state_suspend_gls: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; nca.channel = ndp->channel_probe_id; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; ndp->channel_probe_id++; if (ndp->channel_probe_id == ndp->channel_count) { ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_suspend_dcnt; } break; case ncsi_dev_state_suspend_dcnt: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_DCNT; nca.package = np->id; nca.channel = nc->id; nd->state = ncsi_dev_state_suspend_dc; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; break; case ncsi_dev_state_suspend_dc: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_DC; nca.package = np->id; nca.channel = nc->id; nca.bytes[0] = 1; nd->state = ncsi_dev_state_suspend_deselect; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; NCSI_FOR_EACH_CHANNEL(np, tmp) { /* If there is another channel active on this package * do not deselect the package. */ if (tmp != nc && tmp->state == NCSI_CHANNEL_ACTIVE) { nd->state = ncsi_dev_state_suspend_done; break; } } break; case ncsi_dev_state_suspend_deselect: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_DP; nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; nd->state = ncsi_dev_state_suspend_done; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; break; case ncsi_dev_state_suspend_done: spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); if (ndp->flags & NCSI_DEV_RESET) ncsi_reset_dev(nd); else ncsi_process_next_channel(ndp); break; default: netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n", nd->state); } return; error: nd->state = ncsi_dev_state_functional; } /* Check the VLAN filter bitmap for a set filter, and construct a * "Set VLAN Filter - Disable" packet if found. */ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, struct ncsi_cmd_arg *nca) { struct ncsi_channel_vlan_filter *ncf; unsigned long flags; void *bitmap; int index; u16 vid; ncf = &nc->vlan_filter; bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); index = find_first_bit(bitmap, ncf->n_vids); if (index >= ncf->n_vids) { spin_unlock_irqrestore(&nc->lock, flags); return -1; } vid = ncf->vids[index]; clear_bit(index, bitmap); ncf->vids[index] = 0; spin_unlock_irqrestore(&nc->lock, flags); nca->type = NCSI_PKT_CMD_SVF; nca->words[1] = vid; /* HW filter index starts at 1 */ nca->bytes[6] = index + 1; nca->bytes[7] = 0x00; return 0; } /* Find an outstanding VLAN tag and construct a "Set VLAN Filter - Enable" * packet. */ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, struct ncsi_cmd_arg *nca) { struct ncsi_channel_vlan_filter *ncf; struct vlan_vid *vlan = NULL; unsigned long flags; int i, index; void *bitmap; u16 vid; if (list_empty(&ndp->vlan_vids)) return -1; ncf = &nc->vlan_filter; bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); rcu_read_lock(); list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { vid = vlan->vid; for (i = 0; i < ncf->n_vids; i++) if (ncf->vids[i] == vid) { vid = 0; break; } if (vid) break; } rcu_read_unlock(); if (!vid) { /* No VLAN ID is not set */ spin_unlock_irqrestore(&nc->lock, flags); return -1; } index = find_first_zero_bit(bitmap, ncf->n_vids); if (index < 0 || index >= ncf->n_vids) { netdev_err(ndp->ndev.dev, "Channel %u already has all VLAN filters set\n", nc->id); spin_unlock_irqrestore(&nc->lock, flags); return -1; } ncf->vids[index] = vid; set_bit(index, bitmap); spin_unlock_irqrestore(&nc->lock, flags); nca->type = NCSI_PKT_CMD_SVF; nca->words[1] = vid; /* HW filter index starts at 1 */ nca->bytes[6] = index + 1; nca->bytes[7] = 0x01; return 0; } static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN]; int ret = 0; nca->payload = NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN; memset(data, 0, NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN); *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID); data[4] = NCSI_OEM_INTEL_CMD_KEEP_PHY; /* PHY Link up attribute */ data[6] = 0x1; nca->data = data; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } /* NCSI OEM Command APIs */ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_BCM_CMD_GMA_LEN]; int ret = 0; nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN; memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN); *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_BCM_ID); data[5] = NCSI_OEM_BCM_CMD_GMA; nca->data = data; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca) { union { u8 data_u8[NCSI_OEM_MLX_CMD_GMA_LEN]; u32 data_u32[NCSI_OEM_MLX_CMD_GMA_LEN / sizeof(u32)]; } u; int ret = 0; nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN; memset(&u, 0, sizeof(u)); u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID); u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA; u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM; nca->data = u.data_u8; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca) { union { u8 data_u8[NCSI_OEM_MLX_CMD_SMAF_LEN]; u32 data_u32[NCSI_OEM_MLX_CMD_SMAF_LEN / sizeof(u32)]; } u; int ret = 0; memset(&u, 0, sizeof(u)); u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID); u.data_u8[5] = NCSI_OEM_MLX_CMD_SMAF; u.data_u8[6] = NCSI_OEM_MLX_CMD_SMAF_PARAM; memcpy(&u.data_u8[MLX_SMAF_MAC_ADDR_OFFSET], nca->ndp->ndev.dev->dev_addr, ETH_ALEN); u.data_u8[MLX_SMAF_MED_SUPPORT_OFFSET] = (MLX_MC_RBT_AVL | MLX_MC_RBT_SUPPORT); nca->payload = NCSI_OEM_MLX_CMD_SMAF_LEN; nca->data = u.data_u8; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during probe\n", nca->type); return ret; } static int ncsi_oem_gma_handler_intel(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_INTEL_CMD_GMA_LEN]; int ret = 0; nca->payload = NCSI_OEM_INTEL_CMD_GMA_LEN; memset(data, 0, NCSI_OEM_INTEL_CMD_GMA_LEN); *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID); data[4] = NCSI_OEM_INTEL_CMD_GMA; nca->data = data; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } /* OEM Command handlers initialization */ static struct ncsi_oem_gma_handler { unsigned int mfr_id; int (*handler)(struct ncsi_cmd_arg *nca); } ncsi_oem_gma_handlers[] = { { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm }, { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }, { NCSI_OEM_MFR_INTEL_ID, ncsi_oem_gma_handler_intel } }; static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) { struct ncsi_oem_gma_handler *nch = NULL; int i; /* This function should only be called once, return if flag set */ if (nca->ndp->gma_flag == 1) return -1; /* Find gma handler for given manufacturer id */ for (i = 0; i < ARRAY_SIZE(ncsi_oem_gma_handlers); i++) { if (ncsi_oem_gma_handlers[i].mfr_id == mf_id) { if (ncsi_oem_gma_handlers[i].handler) nch = &ncsi_oem_gma_handlers[i]; break; } } if (!nch) { netdev_err(nca->ndp->ndev.dev, "NCSI: No GMA handler available for MFR-ID (0x%x)\n", mf_id); return -1; } /* Get Mac address from NCSI device */ return nch->handler(nca); } /* Determine if a given channel from the channel_queue should be used for Tx */ static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc) { struct ncsi_channel_mode *ncm; struct ncsi_channel *channel; struct ncsi_package *np; /* Check if any other channel has Tx enabled; a channel may have already * been configured and removed from the channel queue. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { if (!ndp->multi_package && np != nc->package) continue; NCSI_FOR_EACH_CHANNEL(np, channel) { ncm = &channel->modes[NCSI_MODE_TX_ENABLE]; if (ncm->enable) return false; } } /* This channel is the preferred channel and has link */ list_for_each_entry_rcu(channel, &ndp->channel_queue, link) { np = channel->package; if (np->preferred_channel && ncsi_channel_has_link(np->preferred_channel)) { return np->preferred_channel == nc; } } /* This channel has link */ if (ncsi_channel_has_link(nc)) return true; list_for_each_entry_rcu(channel, &ndp->channel_queue, link) if (ncsi_channel_has_link(channel)) return false; /* No other channel has link; default to this one */ return true; } /* Change the active Tx channel in a multi-channel setup */ int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp, struct ncsi_package *package, struct ncsi_channel *disable, struct ncsi_channel *enable) { struct ncsi_cmd_arg nca; struct ncsi_channel *nc; struct ncsi_package *np; int ret = 0; if (!package->multi_channel && !ndp->multi_package) netdev_warn(ndp->ndev.dev, "NCSI: Trying to update Tx channel in single-channel mode\n"); nca.ndp = ndp; nca.req_flags = 0; /* Find current channel with Tx enabled */ NCSI_FOR_EACH_PACKAGE(ndp, np) { if (disable) break; if (!ndp->multi_package && np != package) continue; NCSI_FOR_EACH_CHANNEL(np, nc) if (nc->modes[NCSI_MODE_TX_ENABLE].enable) { disable = nc; break; } } /* Find a suitable channel for Tx */ NCSI_FOR_EACH_PACKAGE(ndp, np) { if (enable) break; if (!ndp->multi_package && np != package) continue; if (!(ndp->package_whitelist & (0x1 << np->id))) continue; if (np->preferred_channel && ncsi_channel_has_link(np->preferred_channel)) { enable = np->preferred_channel; break; } NCSI_FOR_EACH_CHANNEL(np, nc) { if (!(np->channel_whitelist & 0x1 << nc->id)) continue; if (nc->state != NCSI_CHANNEL_ACTIVE) continue; if (ncsi_channel_has_link(nc)) { enable = nc; break; } } } if (disable == enable) return -1; if (!enable) return -1; if (disable) { nca.channel = disable->id; nca.package = disable->package->id; nca.type = NCSI_PKT_CMD_DCNT; ret = ncsi_xmit_cmd(&nca); if (ret) netdev_err(ndp->ndev.dev, "Error %d sending DCNT\n", ret); } netdev_info(ndp->ndev.dev, "NCSI: channel %u enables Tx\n", enable->id); nca.channel = enable->id; nca.package = enable->package->id; nca.type = NCSI_PKT_CMD_ECNT; ret = ncsi_xmit_cmd(&nca); if (ret) netdev_err(ndp->ndev.dev, "Error %d sending ECNT\n", ret); return ret; } static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) { struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; struct ncsi_channel *hot_nc = NULL; struct ncsi_dev *nd = &ndp->ndev; struct net_device *dev = nd->dev; struct ncsi_cmd_arg nca; unsigned char index; unsigned long flags; int ret; nca.ndp = ndp; nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_config: case ncsi_dev_state_config_sp: ndp->pending_req_num = 1; /* Select the specific package */ nca.type = NCSI_PKT_CMD_SP; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else nca.bytes[0] = 1; nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit CMD_SP\n"); goto error; } nd->state = ncsi_dev_state_config_cis; break; case ncsi_dev_state_config_cis: ndp->pending_req_num = 1; /* Clear initial state */ nca.type = NCSI_PKT_CMD_CIS; nca.package = np->id; nca.channel = nc->id; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit CMD_CIS\n"); goto error; } nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) ? ncsi_dev_state_config_oem_gma : ncsi_dev_state_config_clear_vids; break; case ncsi_dev_state_config_oem_gma: nd->state = ncsi_dev_state_config_apply_mac; nca.package = np->id; nca.channel = nc->id; ndp->pending_req_num = 1; if (nc->version.major >= 1 && nc->version.minor >= 2) { nca.type = NCSI_PKT_CMD_GMCMA; ret = ncsi_xmit_cmd(&nca); } else { nca.type = NCSI_PKT_CMD_OEM; ret = ncsi_gma_handler(&nca, nc->version.mf_id); } if (ret < 0) { nd->state = ncsi_dev_state_config_clear_vids; schedule_work(&ndp->work); } break; case ncsi_dev_state_config_apply_mac: rtnl_lock(); ret = dev_set_mac_address(dev, &ndp->pending_mac, NULL); rtnl_unlock(); if (ret < 0) netdev_warn(dev, "NCSI: 'Writing MAC address to device failed\n"); nd->state = ncsi_dev_state_config_clear_vids; fallthrough; case ncsi_dev_state_config_clear_vids: case ncsi_dev_state_config_svf: case ncsi_dev_state_config_ev: case ncsi_dev_state_config_sma: case ncsi_dev_state_config_ebf: case ncsi_dev_state_config_dgmf: case ncsi_dev_state_config_ecnt: case ncsi_dev_state_config_ec: case ncsi_dev_state_config_ae: case ncsi_dev_state_config_gls: ndp->pending_req_num = 1; nca.package = np->id; nca.channel = nc->id; /* Clear any active filters on the channel before setting */ if (nd->state == ncsi_dev_state_config_clear_vids) { ret = clear_one_vid(ndp, nc, &nca); if (ret) { nd->state = ncsi_dev_state_config_svf; schedule_work(&ndp->work); break; } /* Repeat */ nd->state = ncsi_dev_state_config_clear_vids; /* Add known VLAN tags to the filter */ } else if (nd->state == ncsi_dev_state_config_svf) { ret = set_one_vid(ndp, nc, &nca); if (ret) { nd->state = ncsi_dev_state_config_ev; schedule_work(&ndp->work); break; } /* Repeat */ nd->state = ncsi_dev_state_config_svf; /* Enable/Disable the VLAN filter */ } else if (nd->state == ncsi_dev_state_config_ev) { if (list_empty(&ndp->vlan_vids)) { nca.type = NCSI_PKT_CMD_DV; } else { nca.type = NCSI_PKT_CMD_EV; nca.bytes[3] = NCSI_CAP_VLAN_NO; } nd->state = ncsi_dev_state_config_sma; } else if (nd->state == ncsi_dev_state_config_sma) { /* Use first entry in unicast filter table. Note that * the MAC filter table starts from entry 1 instead of * 0. */ nca.type = NCSI_PKT_CMD_SMA; for (index = 0; index < 6; index++) nca.bytes[index] = dev->dev_addr[index]; nca.bytes[6] = 0x1; nca.bytes[7] = 0x1; nd->state = ncsi_dev_state_config_ebf; } else if (nd->state == ncsi_dev_state_config_ebf) { nca.type = NCSI_PKT_CMD_EBF; nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap; /* if multicast global filtering is supported then * disable it so that all multicast packet will be * forwarded to management controller */ if (nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC) nd->state = ncsi_dev_state_config_dgmf; else if (ncsi_channel_is_tx(ndp, nc)) nd->state = ncsi_dev_state_config_ecnt; else nd->state = ncsi_dev_state_config_ec; } else if (nd->state == ncsi_dev_state_config_dgmf) { nca.type = NCSI_PKT_CMD_DGMF; if (ncsi_channel_is_tx(ndp, nc)) nd->state = ncsi_dev_state_config_ecnt; else nd->state = ncsi_dev_state_config_ec; } else if (nd->state == ncsi_dev_state_config_ecnt) { if (np->preferred_channel && nc != np->preferred_channel) netdev_info(ndp->ndev.dev, "NCSI: Tx failed over to channel %u\n", nc->id); nca.type = NCSI_PKT_CMD_ECNT; nd->state = ncsi_dev_state_config_ec; } else if (nd->state == ncsi_dev_state_config_ec) { /* Enable AEN if it's supported */ nca.type = NCSI_PKT_CMD_EC; nd->state = ncsi_dev_state_config_ae; if (!(nc->caps[NCSI_CAP_AEN].cap & NCSI_CAP_AEN_MASK)) nd->state = ncsi_dev_state_config_gls; } else if (nd->state == ncsi_dev_state_config_ae) { nca.type = NCSI_PKT_CMD_AE; nca.bytes[0] = 0; nca.dwords[1] = nc->caps[NCSI_CAP_AEN].cap; nd->state = ncsi_dev_state_config_gls; } else if (nd->state == ncsi_dev_state_config_gls) { nca.type = NCSI_PKT_CMD_GLS; nd->state = ncsi_dev_state_config_done; } ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit CMD %x\n", nca.type); goto error; } break; case ncsi_dev_state_config_done: netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n", nc->id); spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_ACTIVE; if (ndp->flags & NCSI_DEV_RESET) { /* A reset event happened during config, start it now */ nc->reconfigure_needed = false; spin_unlock_irqrestore(&nc->lock, flags); ncsi_reset_dev(nd); break; } if (nc->reconfigure_needed) { /* This channel's configuration has been updated * part-way during the config state - start the * channel configuration over */ nc->reconfigure_needed = false; nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); netdev_dbg(dev, "Dirty NCSI channel state reset\n"); ncsi_process_next_channel(ndp); break; } if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { hot_nc = nc; } else { hot_nc = NULL; netdev_dbg(ndp->ndev.dev, "NCSI: channel %u link down after config\n", nc->id); } spin_unlock_irqrestore(&nc->lock, flags); /* Update the hot channel */ spin_lock_irqsave(&ndp->lock, flags); ndp->hot_channel = hot_nc; spin_unlock_irqrestore(&ndp->lock, flags); ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); break; default: netdev_alert(dev, "Wrong NCSI state 0x%x in config\n", nd->state); } return; error: ncsi_report_link(ndp, true); } static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) { struct ncsi_channel *nc, *found, *hot_nc; struct ncsi_channel_mode *ncm; unsigned long flags, cflags; struct ncsi_package *np; bool with_link; spin_lock_irqsave(&ndp->lock, flags); hot_nc = ndp->hot_channel; spin_unlock_irqrestore(&ndp->lock, flags); /* By default the search is done once an inactive channel with up * link is found, unless a preferred channel is set. * If multi_package or multi_channel are configured all channels in the * whitelist are added to the channel queue. */ found = NULL; with_link = false; NCSI_FOR_EACH_PACKAGE(ndp, np) { if (!(ndp->package_whitelist & (0x1 << np->id))) continue; NCSI_FOR_EACH_CHANNEL(np, nc) { if (!(np->channel_whitelist & (0x1 << nc->id))) continue; spin_lock_irqsave(&nc->lock, cflags); if (!list_empty(&nc->link) || nc->state != NCSI_CHANNEL_INACTIVE) { spin_unlock_irqrestore(&nc->lock, cflags); continue; } if (!found) found = nc; if (nc == hot_nc) found = nc; ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { found = nc; with_link = true; } /* If multi_channel is enabled configure all valid * channels whether or not they currently have link * so they will have AENs enabled. */ if (with_link || np->multi_channel) { spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); netdev_dbg(ndp->ndev.dev, "NCSI: Channel %u added to queue (link %s)\n", nc->id, ncm->data[2] & 0x1 ? "up" : "down"); } spin_unlock_irqrestore(&nc->lock, cflags); if (with_link && !np->multi_channel) break; } if (with_link && !ndp->multi_package) break; } if (list_empty(&ndp->channel_queue) && found) { netdev_info(ndp->ndev.dev, "NCSI: No channel with link found, configuring channel %u\n", found->id); spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&found->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); } else if (!found) { netdev_warn(ndp->ndev.dev, "NCSI: No channel found to configure!\n"); ncsi_report_link(ndp, true); return -ENODEV; } return ncsi_process_next_channel(ndp); } static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) { struct ncsi_package *np; struct ncsi_channel *nc; unsigned int cap; bool has_channel = false; /* The hardware arbitration is disabled if any one channel * doesn't support explicitly. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { has_channel = true; cap = nc->caps[NCSI_CAP_GENERIC].cap; if (!(cap & NCSI_CAP_GENERIC_HWA) || (cap & NCSI_CAP_GENERIC_HWA_MASK) != NCSI_CAP_GENERIC_HWA_SUPPORT) { ndp->flags &= ~NCSI_DEV_HWA; return false; } } } if (has_channel) { ndp->flags |= NCSI_DEV_HWA; return true; } ndp->flags &= ~NCSI_DEV_HWA; return false; } static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_cmd_arg nca; unsigned char index; int ret; nca.ndp = ndp; nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_probe: nd->state = ncsi_dev_state_probe_deselect; fallthrough; case ncsi_dev_state_probe_deselect: ndp->pending_req_num = 8; /* Deselect all possible packages */ nca.type = NCSI_PKT_CMD_DP; nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; } nd->state = ncsi_dev_state_probe_package; break; case ncsi_dev_state_probe_package: if (ndp->package_probe_id >= 8) { /* Last package probed, finishing */ ndp->flags |= NCSI_DEV_PROBED; break; } ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; nca.package = ndp->package_probe_id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_channel; break; case ncsi_dev_state_probe_channel: ndp->active_package = ncsi_find_package(ndp, ndp->package_probe_id); if (!ndp->active_package) { /* No response */ nd->state = ncsi_dev_state_probe_dp; schedule_work(&ndp->work); break; } nd->state = ncsi_dev_state_probe_cis; if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) && ndp->mlx_multi_host) nd->state = ncsi_dev_state_probe_mlx_gma; schedule_work(&ndp->work); break; case ncsi_dev_state_probe_mlx_gma: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_OEM; nca.package = ndp->active_package->id; nca.channel = 0; ret = ncsi_oem_gma_handler_mlx(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_mlx_smaf; break; case ncsi_dev_state_probe_mlx_smaf: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_OEM; nca.package = ndp->active_package->id; nca.channel = 0; ret = ncsi_oem_smaf_mlx(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_cis; break; case ncsi_dev_state_probe_keep_phy: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_OEM; nca.package = ndp->active_package->id; nca.channel = 0; ret = ncsi_oem_keep_phy_intel(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_gvi; break; case ncsi_dev_state_probe_cis: case ncsi_dev_state_probe_gvi: case ncsi_dev_state_probe_gc: case ncsi_dev_state_probe_gls: np = ndp->active_package; ndp->pending_req_num = 1; /* Clear initial state Retrieve version, capability or link status */ if (nd->state == ncsi_dev_state_probe_cis) nca.type = NCSI_PKT_CMD_CIS; else if (nd->state == ncsi_dev_state_probe_gvi) nca.type = NCSI_PKT_CMD_GVI; else if (nd->state == ncsi_dev_state_probe_gc) nca.type = NCSI_PKT_CMD_GC; else nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; nca.channel = ndp->channel_probe_id; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; if (nd->state == ncsi_dev_state_probe_cis) { nd->state = ncsi_dev_state_probe_gvi; if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0) nd->state = ncsi_dev_state_probe_keep_phy; } else if (nd->state == ncsi_dev_state_probe_gvi) { nd->state = ncsi_dev_state_probe_gc; } else if (nd->state == ncsi_dev_state_probe_gc) { nd->state = ncsi_dev_state_probe_gls; } else { nd->state = ncsi_dev_state_probe_cis; ndp->channel_probe_id++; } if (ndp->channel_probe_id == ndp->channel_count) { ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe_dp; } break; case ncsi_dev_state_probe_dp: ndp->pending_req_num = 1; /* Deselect the current package */ nca.type = NCSI_PKT_CMD_DP; nca.package = ndp->package_probe_id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; /* Probe next package after receiving response */ ndp->package_probe_id++; nd->state = ncsi_dev_state_probe_package; ndp->active_package = NULL; break; default: netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n", nd->state); } if (ndp->flags & NCSI_DEV_PROBED) { /* Check if all packages have HWA support */ ncsi_check_hwa(ndp); ncsi_choose_active_channel(ndp); } return; error: netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during probe\n", nca.type); ncsi_report_link(ndp, true); } static void ncsi_dev_work(struct work_struct *work) { struct ncsi_dev_priv *ndp = container_of(work, struct ncsi_dev_priv, work); struct ncsi_dev *nd = &ndp->ndev; switch (nd->state & ncsi_dev_state_major) { case ncsi_dev_state_probe: ncsi_probe_channel(ndp); break; case ncsi_dev_state_suspend: ncsi_suspend_channel(ndp); break; case ncsi_dev_state_config: ncsi_configure_channel(ndp); break; default: netdev_warn(nd->dev, "Wrong NCSI state 0x%x in workqueue\n", nd->state); } } int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) { struct ncsi_channel *nc; int old_state; unsigned long flags; spin_lock_irqsave(&ndp->lock, flags); nc = list_first_or_null_rcu(&ndp->channel_queue, struct ncsi_channel, link); if (!nc) { spin_unlock_irqrestore(&ndp->lock, flags); goto out; } list_del_init(&nc->link); spin_unlock_irqrestore(&ndp->lock, flags); spin_lock_irqsave(&nc->lock, flags); old_state = nc->state; nc->state = NCSI_CHANNEL_INVISIBLE; spin_unlock_irqrestore(&nc->lock, flags); ndp->active_channel = nc; ndp->active_package = nc->package; switch (old_state) { case NCSI_CHANNEL_INACTIVE: ndp->ndev.state = ncsi_dev_state_config; netdev_dbg(ndp->ndev.dev, "NCSI: configuring channel %u\n", nc->id); ncsi_configure_channel(ndp); break; case NCSI_CHANNEL_ACTIVE: ndp->ndev.state = ncsi_dev_state_suspend; netdev_dbg(ndp->ndev.dev, "NCSI: suspending channel %u\n", nc->id); ncsi_suspend_channel(ndp); break; default: netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n", old_state, nc->package->id, nc->id); ncsi_report_link(ndp, false); return -EINVAL; } return 0; out: ndp->active_channel = NULL; ndp->active_package = NULL; if (ndp->flags & NCSI_DEV_RESHUFFLE) { ndp->flags &= ~NCSI_DEV_RESHUFFLE; return ncsi_choose_active_channel(ndp); } ncsi_report_link(ndp, false); return -ENODEV; } static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_channel *nc; struct ncsi_package *np; unsigned long flags; unsigned int n = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); /* Channels may be busy, mark dirty instead of * kicking if; * a) not ACTIVE (configured) * b) in the channel_queue (to be configured) * c) it's ndev is in the config state */ if (nc->state != NCSI_CHANNEL_ACTIVE) { if ((ndp->ndev.state & 0xff00) == ncsi_dev_state_config || !list_empty(&nc->link)) { netdev_dbg(nd->dev, "NCSI: channel %p marked dirty\n", nc); nc->reconfigure_needed = true; } spin_unlock_irqrestore(&nc->lock, flags); continue; } spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); netdev_dbg(nd->dev, "NCSI: kicked channel %p\n", nc); n++; } } return n; } int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ncsi_dev_priv *ndp; unsigned int n_vids = 0; struct vlan_vid *vlan; struct ncsi_dev *nd; bool found = false; if (vid == 0) return 0; nd = ncsi_find_dev(dev); if (!nd) { netdev_warn(dev, "NCSI: No net_device?\n"); return 0; } ndp = TO_NCSI_DEV_PRIV(nd); /* Add the VLAN id to our internal list */ list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { n_vids++; if (vlan->vid == vid) { netdev_dbg(dev, "NCSI: vid %u already registered\n", vid); return 0; } } if (n_vids >= NCSI_MAX_VLAN_VIDS) { netdev_warn(dev, "tried to add vlan id %u but NCSI max already registered (%u)\n", vid, NCSI_MAX_VLAN_VIDS); return -ENOSPC; } vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); if (!vlan) return -ENOMEM; vlan->proto = proto; vlan->vid = vid; list_add_rcu(&vlan->list, &ndp->vlan_vids); netdev_dbg(dev, "NCSI: Added new vid %u\n", vid); found = ncsi_kick_channels(ndp) != 0; return found ? ncsi_process_next_channel(ndp) : 0; } EXPORT_SYMBOL_GPL(ncsi_vlan_rx_add_vid); int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_vid *vlan, *tmp; struct ncsi_dev_priv *ndp; struct ncsi_dev *nd; bool found = false; if (vid == 0) return 0; nd = ncsi_find_dev(dev); if (!nd) { netdev_warn(dev, "NCSI: no net_device?\n"); return 0; } ndp = TO_NCSI_DEV_PRIV(nd); /* Remove the VLAN id from our internal list */ list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) if (vlan->vid == vid) { netdev_dbg(dev, "NCSI: vid %u found, removing\n", vid); list_del_rcu(&vlan->list); found = true; kfree(vlan); } if (!found) { netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid); return -EINVAL; } found = ncsi_kick_channels(ndp) != 0; return found ? ncsi_process_next_channel(ndp) : 0; } EXPORT_SYMBOL_GPL(ncsi_vlan_rx_kill_vid); struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*handler)(struct ncsi_dev *ndev)) { struct ncsi_dev_priv *ndp; struct ncsi_dev *nd; struct platform_device *pdev; struct device_node *np; unsigned long flags; int i; /* Check if the device has been registered or not */ nd = ncsi_find_dev(dev); if (nd) return nd; /* Create NCSI device */ ndp = kzalloc(sizeof(*ndp), GFP_ATOMIC); if (!ndp) return NULL; nd = &ndp->ndev; nd->state = ncsi_dev_state_registered; nd->dev = dev; nd->handler = handler; ndp->pending_req_num = 0; INIT_LIST_HEAD(&ndp->channel_queue); INIT_LIST_HEAD(&ndp->vlan_vids); INIT_WORK(&ndp->work, ncsi_dev_work); ndp->package_whitelist = UINT_MAX; /* Initialize private NCSI device */ spin_lock_init(&ndp->lock); INIT_LIST_HEAD(&ndp->packages); ndp->request_id = NCSI_REQ_START_IDX; for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { ndp->requests[i].id = i; ndp->requests[i].ndp = ndp; timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0); } ndp->channel_count = NCSI_RESERVED_CHANNEL; spin_lock_irqsave(&ncsi_dev_lock, flags); list_add_tail_rcu(&ndp->node, &ncsi_dev_list); spin_unlock_irqrestore(&ncsi_dev_lock, flags); /* Register NCSI packet Rx handler */ ndp->ptype.type = cpu_to_be16(ETH_P_NCSI); ndp->ptype.func = ncsi_rcv_rsp; ndp->ptype.dev = dev; dev_add_pack(&ndp->ptype); pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; if (np && (of_property_read_bool(np, "mellanox,multi-host") || of_property_read_bool(np, "mlx,multi-host"))) ndp->mlx_multi_host = true; } return nd; } EXPORT_SYMBOL_GPL(ncsi_register_dev); int ncsi_start_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); if (nd->state != ncsi_dev_state_registered && nd->state != ncsi_dev_state_functional) return -ENOTTY; if (!(ndp->flags & NCSI_DEV_PROBED)) { ndp->package_probe_id = 0; ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe; schedule_work(&ndp->work); return 0; } return ncsi_reset_dev(nd); } EXPORT_SYMBOL_GPL(ncsi_start_dev); void ncsi_stop_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_package *np; struct ncsi_channel *nc; bool chained; int old_state; unsigned long flags; /* Stop the channel monitor on any active channels. Don't reset the * channel state so we know which were active when ncsi_start_dev() * is next called. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { ncsi_stop_channel_monitor(nc); spin_lock_irqsave(&nc->lock, flags); chained = !list_empty(&nc->link); old_state = nc->state; spin_unlock_irqrestore(&nc->lock, flags); WARN_ON_ONCE(chained || old_state == NCSI_CHANNEL_INVISIBLE); } } netdev_dbg(ndp->ndev.dev, "NCSI: Stopping device\n"); ncsi_report_link(ndp, true); } EXPORT_SYMBOL_GPL(ncsi_stop_dev); int ncsi_reset_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_channel *nc, *active, *tmp; struct ncsi_package *np; unsigned long flags; spin_lock_irqsave(&ndp->lock, flags); if (!(ndp->flags & NCSI_DEV_RESET)) { /* Haven't been called yet, check states */ switch (nd->state & ncsi_dev_state_major) { case ncsi_dev_state_registered: case ncsi_dev_state_probe: /* Not even probed yet - do nothing */ spin_unlock_irqrestore(&ndp->lock, flags); return 0; case ncsi_dev_state_suspend: case ncsi_dev_state_config: /* Wait for the channel to finish its suspend/config * operation; once it finishes it will check for * NCSI_DEV_RESET and reset the state. */ ndp->flags |= NCSI_DEV_RESET; spin_unlock_irqrestore(&ndp->lock, flags); return 0; } } else { switch (nd->state) { case ncsi_dev_state_suspend_done: case ncsi_dev_state_config_done: case ncsi_dev_state_functional: /* Ok */ break; default: /* Current reset operation happening */ spin_unlock_irqrestore(&ndp->lock, flags); return 0; } } if (!list_empty(&ndp->channel_queue)) { /* Clear any channel queue we may have interrupted */ list_for_each_entry_safe(nc, tmp, &ndp->channel_queue, link) list_del_init(&nc->link); } spin_unlock_irqrestore(&ndp->lock, flags); active = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); if (nc->state == NCSI_CHANNEL_ACTIVE) { active = nc; nc->state = NCSI_CHANNEL_INVISIBLE; spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); break; } spin_unlock_irqrestore(&nc->lock, flags); } if (active) break; } if (!active) { /* Done */ spin_lock_irqsave(&ndp->lock, flags); ndp->flags &= ~NCSI_DEV_RESET; spin_unlock_irqrestore(&ndp->lock, flags); return ncsi_choose_active_channel(ndp); } spin_lock_irqsave(&ndp->lock, flags); ndp->flags |= NCSI_DEV_RESET; ndp->active_channel = active; ndp->active_package = active->package; spin_unlock_irqrestore(&ndp->lock, flags); nd->state = ncsi_dev_state_suspend; schedule_work(&ndp->work); return 0; } void ncsi_unregister_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_package *np, *tmp; unsigned long flags; dev_remove_pack(&ndp->ptype); list_for_each_entry_safe(np, tmp, &ndp->packages, node) ncsi_remove_package(np); spin_lock_irqsave(&ncsi_dev_lock, flags); list_del_rcu(&ndp->node); spin_unlock_irqrestore(&ncsi_dev_lock, flags); disable_work_sync(&ndp->work); kfree(ndp); } EXPORT_SYMBOL_GPL(ncsi_unregister_dev); |
438 421 221 222 437 425 425 423 425 443 439 420 19 425 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2005,2006,2007,2008 IBM Corporation * * Authors: * Serge Hallyn <serue@us.ibm.com> * Reiner Sailer <sailer@watson.ibm.com> * Mimi Zohar <zohar@us.ibm.com> * * File: ima_queue.c * Implements queues that store template measurements and * maintains aggregate over the stored measurements * in the pre-configured TPM PCR (if available). * The measurement list is append-only. No entry is * ever removed or changed during the boot-cycle. */ #include <linux/rculist.h> #include <linux/reboot.h> #include <linux/slab.h> #include "ima.h" #define AUDIT_CAUSE_LEN_MAX 32 /* pre-allocated array of tpm_digest structures to extend a PCR */ static struct tpm_digest *digests; LIST_HEAD(ima_measurements); /* list of all measurements */ #ifdef CONFIG_IMA_KEXEC static unsigned long binary_runtime_size; #else static unsigned long binary_runtime_size = ULONG_MAX; #endif /* key: inode (before secure-hashing a file) */ struct ima_h_table ima_htable = { .len = ATOMIC_LONG_INIT(0), .violations = ATOMIC_LONG_INIT(0), .queue[0 ... IMA_MEASURE_HTABLE_SIZE - 1] = HLIST_HEAD_INIT }; /* mutex protects atomicity of extending measurement list * and extending the TPM PCR aggregate. Since tpm_extend can take * long (and the tpm driver uses a mutex), we can't use the spinlock. */ static DEFINE_MUTEX(ima_extend_list_mutex); /* * Used internally by the kernel to suspend measurements. * Protected by ima_extend_list_mutex. */ static bool ima_measurements_suspended; /* lookup up the digest value in the hash table, and return the entry */ static struct ima_queue_entry *ima_lookup_digest_entry(u8 *digest_value, int pcr) { struct ima_queue_entry *qe, *ret = NULL; unsigned int key; int rc; key = ima_hash_key(digest_value); rcu_read_lock(); hlist_for_each_entry_rcu(qe, &ima_htable.queue[key], hnext) { rc = memcmp(qe->entry->digests[ima_hash_algo_idx].digest, digest_value, hash_digest_size[ima_hash_algo]); if ((rc == 0) && (qe->entry->pcr == pcr)) { ret = qe; break; } } rcu_read_unlock(); return ret; } /* * Calculate the memory required for serializing a single * binary_runtime_measurement list entry, which contains a * couple of variable length fields (e.g template name and data). */ static int get_binary_runtime_size(struct ima_template_entry *entry) { int size = 0; size += sizeof(u32); /* pcr */ size += TPM_DIGEST_SIZE; size += sizeof(int); /* template name size field */ size += strlen(entry->template_desc->name); size += sizeof(entry->template_data_len); size += entry->template_data_len; return size; } /* ima_add_template_entry helper function: * - Add template entry to the measurement list and hash table, for * all entries except those carried across kexec. * * (Called with ima_extend_list_mutex held.) */ static int ima_add_digest_entry(struct ima_template_entry *entry, bool update_htable) { struct ima_queue_entry *qe; unsigned int key; qe = kmalloc(sizeof(*qe), GFP_KERNEL); if (qe == NULL) { pr_err("OUT OF MEMORY ERROR creating queue entry\n"); return -ENOMEM; } qe->entry = entry; INIT_LIST_HEAD(&qe->later); list_add_tail_rcu(&qe->later, &ima_measurements); atomic_long_inc(&ima_htable.len); if (update_htable) { key = ima_hash_key(entry->digests[ima_hash_algo_idx].digest); hlist_add_head_rcu(&qe->hnext, &ima_htable.queue[key]); } if (binary_runtime_size != ULONG_MAX) { int size; size = get_binary_runtime_size(entry); binary_runtime_size = (binary_runtime_size < ULONG_MAX - size) ? binary_runtime_size + size : ULONG_MAX; } return 0; } /* * Return the amount of memory required for serializing the * entire binary_runtime_measurement list, including the ima_kexec_hdr * structure. */ unsigned long ima_get_binary_runtime_size(void) { if (binary_runtime_size >= (ULONG_MAX - sizeof(struct ima_kexec_hdr))) return ULONG_MAX; else return binary_runtime_size + sizeof(struct ima_kexec_hdr); } static int ima_pcr_extend(struct tpm_digest *digests_arg, int pcr) { int result = 0; if (!ima_tpm_chip) return result; result = tpm_pcr_extend(ima_tpm_chip, pcr, digests_arg); if (result != 0) pr_err("Error Communicating to TPM chip, result: %d\n", result); return result; } /* * Add template entry to the measurement list and hash table, and * extend the pcr. * * On systems which support carrying the IMA measurement list across * kexec, maintain the total memory size required for serializing the * binary_runtime_measurements. */ int ima_add_template_entry(struct ima_template_entry *entry, int violation, const char *op, struct inode *inode, const unsigned char *filename) { u8 *digest = entry->digests[ima_hash_algo_idx].digest; struct tpm_digest *digests_arg = entry->digests; const char *audit_cause = "hash_added"; char tpm_audit_cause[AUDIT_CAUSE_LEN_MAX]; int audit_info = 1; int result = 0, tpmresult = 0; mutex_lock(&ima_extend_list_mutex); /* * Avoid appending to the measurement log when the TPM subsystem has * been shut down while preparing for system reboot. */ if (ima_measurements_suspended) { audit_cause = "measurements_suspended"; audit_info = 0; result = -ENODEV; goto out; } if (!violation && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) { if (ima_lookup_digest_entry(digest, entry->pcr)) { audit_cause = "hash_exists"; result = -EEXIST; goto out; } } result = ima_add_digest_entry(entry, !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)); if (result < 0) { audit_cause = "ENOMEM"; audit_info = 0; goto out; } if (violation) /* invalidate pcr */ digests_arg = digests; tpmresult = ima_pcr_extend(digests_arg, entry->pcr); if (tpmresult != 0) { snprintf(tpm_audit_cause, AUDIT_CAUSE_LEN_MAX, "TPM_error(%d)", tpmresult); audit_cause = tpm_audit_cause; audit_info = 0; } out: mutex_unlock(&ima_extend_list_mutex); integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, filename, op, audit_cause, result, audit_info); return result; } int ima_restore_measurement_entry(struct ima_template_entry *entry) { int result = 0; mutex_lock(&ima_extend_list_mutex); result = ima_add_digest_entry(entry, 0); mutex_unlock(&ima_extend_list_mutex); return result; } static void ima_measurements_suspend(void) { mutex_lock(&ima_extend_list_mutex); ima_measurements_suspended = true; mutex_unlock(&ima_extend_list_mutex); } static int ima_reboot_notifier(struct notifier_block *nb, unsigned long action, void *data) { ima_measurements_suspend(); return NOTIFY_DONE; } static struct notifier_block ima_reboot_nb = { .notifier_call = ima_reboot_notifier, }; void __init ima_init_reboot_notifier(void) { register_reboot_notifier(&ima_reboot_nb); } int __init ima_init_digests(void) { u16 digest_size; u16 crypto_id; int i; if (!ima_tpm_chip) return 0; digests = kcalloc(ima_tpm_chip->nr_allocated_banks, sizeof(*digests), GFP_NOFS); if (!digests) return -ENOMEM; for (i = 0; i < ima_tpm_chip->nr_allocated_banks; i++) { digests[i].alg_id = ima_tpm_chip->allocated_banks[i].alg_id; digest_size = ima_tpm_chip->allocated_banks[i].digest_size; crypto_id = ima_tpm_chip->allocated_banks[i].crypto_id; /* for unmapped TPM algorithms digest is still a padded SHA1 */ if (crypto_id == HASH_ALGO__LAST) digest_size = SHA1_DIGEST_SIZE; memset(digests[i].digest, 0xff, digest_size); } return 0; } |
12 7 5 1 1 5 6 1 14 14 10 4 14 2 12 1 1 1 13 13 1 1 1 8 2 8 2 10 3 8 7 1 3 2 1 1 1 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 | // SPDX-License-Identifier: GPL-2.0 #include <net/xsk_buff_pool.h> #include <net/xdp_sock.h> #include <net/xdp_sock_drv.h> #include "xsk_queue.h" #include "xdp_umem.h" #include "xsk.h" void xp_add_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs) { unsigned long flags; if (!xs->tx) return; spin_lock_irqsave(&pool->xsk_tx_list_lock, flags); list_add_rcu(&xs->tx_list, &pool->xsk_tx_list); spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags); } void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs) { unsigned long flags; if (!xs->tx) return; spin_lock_irqsave(&pool->xsk_tx_list_lock, flags); list_del_rcu(&xs->tx_list); spin_unlock_irqrestore(&pool->xsk_tx_list_lock, flags); } void xp_destroy(struct xsk_buff_pool *pool) { if (!pool) return; kvfree(pool->tx_descs); kvfree(pool->heads); kvfree(pool); } int xp_alloc_tx_descs(struct xsk_buff_pool *pool, struct xdp_sock *xs) { pool->tx_descs = kvcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL); if (!pool->tx_descs) return -ENOMEM; return 0; } struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem) { bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; struct xsk_buff_pool *pool; struct xdp_buff_xsk *xskb; u32 i, entries; entries = unaligned ? umem->chunks : 0; pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL); if (!pool) goto out; pool->heads = kvcalloc(umem->chunks, sizeof(*pool->heads), GFP_KERNEL); if (!pool->heads) goto out; if (xs->tx) if (xp_alloc_tx_descs(pool, xs)) goto out; pool->chunk_mask = ~((u64)umem->chunk_size - 1); pool->addrs_cnt = umem->size; pool->heads_cnt = umem->chunks; pool->free_heads_cnt = umem->chunks; pool->headroom = umem->headroom; pool->chunk_size = umem->chunk_size; pool->chunk_shift = ffs(umem->chunk_size) - 1; pool->unaligned = unaligned; pool->frame_len = umem->chunk_size - umem->headroom - XDP_PACKET_HEADROOM; pool->umem = umem; pool->addrs = umem->addrs; pool->tx_metadata_len = umem->tx_metadata_len; pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); spin_lock_init(&pool->xsk_tx_list_lock); spin_lock_init(&pool->cq_lock); refcount_set(&pool->users, 1); pool->fq = xs->fq_tmp; pool->cq = xs->cq_tmp; for (i = 0; i < pool->free_heads_cnt; i++) { xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; INIT_LIST_HEAD(&xskb->list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else xp_init_xskb_addr(xskb, pool, i * pool->chunk_size); } return pool; out: xp_destroy(pool); return NULL; } void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { u32 i; for (i = 0; i < pool->heads_cnt; i++) pool->heads[i].xdp.rxq = rxq; } EXPORT_SYMBOL(xp_set_rxq_info); void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc) { u32 i; for (i = 0; i < pool->heads_cnt; i++) { struct xdp_buff_xsk *xskb = &pool->heads[i]; memcpy(xskb->cb + desc->off, desc->src, desc->bytes); } } EXPORT_SYMBOL(xp_fill_cb); static void xp_disable_drv_zc(struct xsk_buff_pool *pool) { struct netdev_bpf bpf; int err; ASSERT_RTNL(); if (pool->umem->zc) { bpf.command = XDP_SETUP_XSK_POOL; bpf.xsk.pool = NULL; bpf.xsk.queue_id = pool->queue_id; err = pool->netdev->netdev_ops->ndo_bpf(pool->netdev, &bpf); if (err) WARN(1, "Failed to disable zero-copy!\n"); } } #define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \ NETDEV_XDP_ACT_REDIRECT | \ NETDEV_XDP_ACT_XSK_ZEROCOPY) int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *netdev, u16 queue_id, u16 flags) { bool force_zc, force_copy; struct netdev_bpf bpf; int err = 0; ASSERT_RTNL(); force_zc = flags & XDP_ZEROCOPY; force_copy = flags & XDP_COPY; if (force_zc && force_copy) return -EINVAL; if (xsk_get_pool_from_qid(netdev, queue_id)) return -EBUSY; pool->netdev = netdev; pool->queue_id = queue_id; err = xsk_reg_pool_at_qid(netdev, pool, queue_id); if (err) return err; if (flags & XDP_USE_SG) pool->umem->flags |= XDP_UMEM_SG_FLAG; if (flags & XDP_USE_NEED_WAKEUP) pool->uses_need_wakeup = true; /* Tx needs to be explicitly woken up the first time. Also * for supporting drivers that do not implement this * feature. They will always have to call sendto() or poll(). */ pool->cached_need_wakeup = XDP_WAKEUP_TX; dev_hold(netdev); if (force_copy) /* For copy-mode, we are done. */ return 0; if ((netdev->xdp_features & NETDEV_XDP_ACT_ZC) != NETDEV_XDP_ACT_ZC) { err = -EOPNOTSUPP; goto err_unreg_pool; } if (netdev->xdp_zc_max_segs == 1 && (flags & XDP_USE_SG)) { err = -EOPNOTSUPP; goto err_unreg_pool; } if (dev_get_min_mp_channel_count(netdev)) { err = -EBUSY; goto err_unreg_pool; } bpf.command = XDP_SETUP_XSK_POOL; bpf.xsk.pool = pool; bpf.xsk.queue_id = queue_id; err = netdev->netdev_ops->ndo_bpf(netdev, &bpf); if (err) goto err_unreg_pool; if (!pool->dma_pages) { WARN(1, "Driver did not DMA map zero-copy buffers"); err = -EINVAL; goto err_unreg_xsk; } pool->umem->zc = true; pool->xdp_zc_max_segs = netdev->xdp_zc_max_segs; return 0; err_unreg_xsk: xp_disable_drv_zc(pool); err_unreg_pool: if (!force_zc) err = 0; /* fallback to copy mode */ if (err) { xsk_clear_pool_at_qid(netdev, queue_id); dev_put(netdev); } return err; } int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs, struct net_device *dev, u16 queue_id) { u16 flags; struct xdp_umem *umem = umem_xs->umem; /* One fill and completion ring required for each queue id. */ if (!pool->fq || !pool->cq) return -EINVAL; flags = umem->zc ? XDP_ZEROCOPY : XDP_COPY; if (umem_xs->pool->uses_need_wakeup) flags |= XDP_USE_NEED_WAKEUP; return xp_assign_dev(pool, dev, queue_id, flags); } void xp_clear_dev(struct xsk_buff_pool *pool) { if (!pool->netdev) return; xp_disable_drv_zc(pool); xsk_clear_pool_at_qid(pool->netdev, pool->queue_id); dev_put(pool->netdev); pool->netdev = NULL; } static void xp_release_deferred(struct work_struct *work) { struct xsk_buff_pool *pool = container_of(work, struct xsk_buff_pool, work); rtnl_lock(); xp_clear_dev(pool); rtnl_unlock(); if (pool->fq) { xskq_destroy(pool->fq); pool->fq = NULL; } if (pool->cq) { xskq_destroy(pool->cq); pool->cq = NULL; } xdp_put_umem(pool->umem, false); xp_destroy(pool); } void xp_get_pool(struct xsk_buff_pool *pool) { refcount_inc(&pool->users); } bool xp_put_pool(struct xsk_buff_pool *pool) { if (!pool) return false; if (refcount_dec_and_test(&pool->users)) { INIT_WORK(&pool->work, xp_release_deferred); schedule_work(&pool->work); return true; } return false; } static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool) { struct xsk_dma_map *dma_map; list_for_each_entry(dma_map, &pool->umem->xsk_dma_list, list) { if (dma_map->netdev == pool->netdev) return dma_map; } return NULL; } static struct xsk_dma_map *xp_create_dma_map(struct device *dev, struct net_device *netdev, u32 nr_pages, struct xdp_umem *umem) { struct xsk_dma_map *dma_map; dma_map = kzalloc(sizeof(*dma_map), GFP_KERNEL); if (!dma_map) return NULL; dma_map->dma_pages = kvcalloc(nr_pages, sizeof(*dma_map->dma_pages), GFP_KERNEL); if (!dma_map->dma_pages) { kfree(dma_map); return NULL; } dma_map->netdev = netdev; dma_map->dev = dev; dma_map->dma_pages_cnt = nr_pages; refcount_set(&dma_map->users, 1); list_add(&dma_map->list, &umem->xsk_dma_list); return dma_map; } static void xp_destroy_dma_map(struct xsk_dma_map *dma_map) { list_del(&dma_map->list); kvfree(dma_map->dma_pages); kfree(dma_map); } static void __xp_dma_unmap(struct xsk_dma_map *dma_map, unsigned long attrs) { dma_addr_t *dma; u32 i; for (i = 0; i < dma_map->dma_pages_cnt; i++) { dma = &dma_map->dma_pages[i]; if (*dma) { *dma &= ~XSK_NEXT_PG_CONTIG_MASK; dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE, DMA_BIDIRECTIONAL, attrs); *dma = 0; } } xp_destroy_dma_map(dma_map); } void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs) { struct xsk_dma_map *dma_map; if (!pool->dma_pages) return; dma_map = xp_find_dma_map(pool); if (!dma_map) { WARN(1, "Could not find dma_map for device"); return; } if (refcount_dec_and_test(&dma_map->users)) __xp_dma_unmap(dma_map, attrs); kvfree(pool->dma_pages); pool->dma_pages = NULL; pool->dma_pages_cnt = 0; pool->dev = NULL; } EXPORT_SYMBOL(xp_dma_unmap); static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map) { u32 i; for (i = 0; i < dma_map->dma_pages_cnt - 1; i++) { if (dma_map->dma_pages[i] + PAGE_SIZE == dma_map->dma_pages[i + 1]) dma_map->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK; else dma_map->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK; } } static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map) { if (!pool->unaligned) { u32 i; for (i = 0; i < pool->heads_cnt; i++) { struct xdp_buff_xsk *xskb = &pool->heads[i]; u64 orig_addr; orig_addr = xskb->xdp.data_hard_start - pool->addrs - pool->headroom; xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, orig_addr); } } pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL); if (!pool->dma_pages) return -ENOMEM; pool->dev = dma_map->dev; pool->dma_pages_cnt = dma_map->dma_pages_cnt; memcpy(pool->dma_pages, dma_map->dma_pages, pool->dma_pages_cnt * sizeof(*pool->dma_pages)); return 0; } int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages) { struct xsk_dma_map *dma_map; dma_addr_t dma; int err; u32 i; dma_map = xp_find_dma_map(pool); if (dma_map) { err = xp_init_dma_info(pool, dma_map); if (err) return err; refcount_inc(&dma_map->users); return 0; } dma_map = xp_create_dma_map(dev, pool->netdev, nr_pages, pool->umem); if (!dma_map) return -ENOMEM; for (i = 0; i < dma_map->dma_pages_cnt; i++) { dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, attrs); if (dma_mapping_error(dev, dma)) { __xp_dma_unmap(dma_map, attrs); return -ENOMEM; } dma_map->dma_pages[i] = dma; } if (pool->unaligned) xp_check_dma_contiguity(dma_map); err = xp_init_dma_info(pool, dma_map); if (err) { __xp_dma_unmap(dma_map, attrs); return err; } return 0; } EXPORT_SYMBOL(xp_dma_map); static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool, u64 addr) { return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size); } static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr) { *addr = xp_unaligned_extract_addr(*addr); if (*addr >= pool->addrs_cnt || *addr + pool->chunk_size > pool->addrs_cnt || xp_addr_crosses_non_contig_pg(pool, *addr)) return false; return true; } static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr) { *addr = xp_aligned_extract_addr(pool, *addr); return *addr < pool->addrs_cnt; } static struct xdp_buff_xsk *xp_get_xskb(struct xsk_buff_pool *pool, u64 addr) { struct xdp_buff_xsk *xskb; if (pool->unaligned) { xskb = pool->free_heads[--pool->free_heads_cnt]; xp_init_xskb_addr(xskb, pool, addr); if (pool->dma_pages) xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); } else { xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; } return xskb; } static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb; u64 addr; bool ok; if (pool->free_heads_cnt == 0) return NULL; for (;;) { if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) { pool->fq->queue_empty_descs++; return NULL; } ok = pool->unaligned ? xp_check_unaligned(pool, &addr) : xp_check_aligned(pool, &addr); if (!ok) { pool->fq->invalid_descs++; xskq_cons_release(pool->fq); continue; } break; } xskb = xp_get_xskb(pool, addr); xskq_cons_release(pool->fq); return xskb; } struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb; if (!pool->free_list_cnt) { xskb = __xp_alloc(pool); if (!xskb) return NULL; } else { pool->free_list_cnt--; xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node); list_del_init(&xskb->list_node); } xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM; xskb->xdp.data_meta = xskb->xdp.data; xskb->xdp.flags = 0; if (pool->dev) xp_dma_sync_for_device(pool, xskb->dma, pool->frame_len); return &xskb->xdp; } EXPORT_SYMBOL(xp_alloc); static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { u32 i, cached_cons, nb_entries; if (max > pool->free_heads_cnt) max = pool->free_heads_cnt; max = xskq_cons_nb_entries(pool->fq, max); cached_cons = pool->fq->cached_cons; nb_entries = max; i = max; while (i--) { struct xdp_buff_xsk *xskb; u64 addr; bool ok; __xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr); ok = pool->unaligned ? xp_check_unaligned(pool, &addr) : xp_check_aligned(pool, &addr); if (unlikely(!ok)) { pool->fq->invalid_descs++; nb_entries--; continue; } xskb = xp_get_xskb(pool, addr); *xdp = &xskb->xdp; xdp++; } xskq_cons_release_n(pool->fq, max); return nb_entries; } static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries) { struct xdp_buff_xsk *xskb; u32 i; nb_entries = min_t(u32, nb_entries, pool->free_list_cnt); i = nb_entries; while (i--) { xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, list_node); list_del_init(&xskb->list_node); *xdp = &xskb->xdp; xdp++; } pool->free_list_cnt -= nb_entries; return nb_entries; } static u32 xp_alloc_slow(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { int i; for (i = 0; i < max; i++) { struct xdp_buff *buff; buff = xp_alloc(pool); if (unlikely(!buff)) return i; *xdp = buff; xdp++; } return max; } u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) { u32 nb_entries1 = 0, nb_entries2; if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) return xp_alloc_slow(pool, xdp, max); if (unlikely(pool->free_list_cnt)) { nb_entries1 = xp_alloc_reused(pool, xdp, max); if (nb_entries1 == max) return nb_entries1; max -= nb_entries1; xdp += nb_entries1; } nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max); if (!nb_entries2) pool->fq->queue_empty_descs++; return nb_entries1 + nb_entries2; } EXPORT_SYMBOL(xp_alloc_batch); bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count) { u32 req_count, avail_count; if (pool->free_list_cnt >= count) return true; req_count = count - pool->free_list_cnt; avail_count = xskq_cons_nb_entries(pool->fq, req_count); if (!avail_count) pool->fq->queue_empty_descs++; return avail_count >= req_count; } EXPORT_SYMBOL(xp_can_alloc); void xp_free(struct xdp_buff_xsk *xskb) { if (!list_empty(&xskb->list_node)) return; xskb->pool->free_list_cnt++; list_add(&xskb->list_node, &xskb->pool->free_list); } EXPORT_SYMBOL(xp_free); void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr) { addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return pool->addrs + addr; } EXPORT_SYMBOL(xp_raw_get_data); dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) { addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr; return (pool->dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) + (addr & ~PAGE_MASK); } EXPORT_SYMBOL(xp_raw_get_dma); |
2029 2027 2029 2027 1 2029 2027 922 1 1 921 7 1 914 922 2013 1 8 1 3 2009 1 1 2 1 4 2028 1 2024 9 9 9 7 2014 3 1 2015 1525 1530 2013 2028 48 5 1988 1556 11 11 1 1758 1964 47 47 6 1987 34 2027 2028 1987 3 23 1 1949 1950 1 1948 10 9 1 2 1 1 1 2027 6 1 1749 2 1947 10 1969 1 7 2019 2028 1943 81 2026 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 | // SPDX-License-Identifier: GPL-2.0-or-later /* * x86 instruction analysis * * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ #include <linux/kernel.h> #ifdef __KERNEL__ #include <linux/string.h> #else #include <string.h> #endif #include <asm/inat.h> /*__ignore_sync_check__ */ #include <asm/insn.h> /* __ignore_sync_check__ */ #include <linux/unaligned.h> /* __ignore_sync_check__ */ #include <linux/errno.h> #include <linux/kconfig.h> #include <asm/emulate_prefix.h> /* __ignore_sync_check__ */ #define leXX_to_cpu(t, r) \ ({ \ __typeof__(t) v; \ switch (sizeof(t)) { \ case 4: v = le32_to_cpu(r); break; \ case 2: v = le16_to_cpu(r); break; \ case 1: v = r; break; \ default: \ BUILD_BUG(); break; \ } \ v; \ }) /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ ({ t r = get_unaligned((t *)(insn)->next_byte); (insn)->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ ({ t r = get_unaligned((t *)(insn)->next_byte + n); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) #define peek_nbyte_next(t, insn, n) \ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) #define peek_next(t, insn) peek_nbyte_next(t, insn, 0) /** * insn_init() - initialize struct insn * @insn: &struct insn to be initialized * @kaddr: address (in kernel memory) of instruction (or copy thereof) * @buf_len: length of the insn buffer at @kaddr * @x86_64: !0 for 64-bit kernel or 64-bit app */ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) { /* * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid * even if the input buffer is long enough to hold them. */ if (buf_len > MAX_INSN_SIZE) buf_len = MAX_INSN_SIZE; memset(insn, 0, sizeof(*insn)); insn->kaddr = kaddr; insn->end_kaddr = kaddr + buf_len; insn->next_byte = kaddr; insn->x86_64 = x86_64; insn->opnd_bytes = 4; if (x86_64) insn->addr_bytes = 8; else insn->addr_bytes = 4; } static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX }; static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX }; static int __insn_get_emulate_prefix(struct insn *insn, const insn_byte_t *prefix, size_t len) { size_t i; for (i = 0; i < len; i++) { if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i]) goto err_out; } insn->emulate_prefix_size = len; insn->next_byte += len; return 1; err_out: return 0; } static void insn_get_emulate_prefix(struct insn *insn) { if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix))) return; __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix)); } /** * insn_get_prefixes - scan x86 instruction prefix bytes * @insn: &struct insn containing instruction * * Populates the @insn->prefixes bitmap, and updates @insn->next_byte * to point to the (first) opcode. No effect if @insn->prefixes.got * is already set. * * * Returns: * 0: on success * < 0: on error */ int insn_get_prefixes(struct insn *insn) { struct insn_field *prefixes = &insn->prefixes; insn_attr_t attr; insn_byte_t b, lb; int i, nb; if (prefixes->got) return 0; insn_get_emulate_prefix(insn); nb = 0; lb = 0; b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); while (inat_is_legacy_prefix(attr)) { /* Skip if same prefix */ for (i = 0; i < nb; i++) if (prefixes->bytes[i] == b) goto found; if (nb == 4) /* Invalid instruction */ break; prefixes->bytes[nb++] = b; if (inat_is_address_size_prefix(attr)) { /* address size switches 2/4 or 4/8 */ if (insn->x86_64) insn->addr_bytes ^= 12; else insn->addr_bytes ^= 6; } else if (inat_is_operand_size_prefix(attr)) { /* oprand size switches 2/4 */ insn->opnd_bytes ^= 6; } found: prefixes->nbytes++; insn->next_byte++; lb = b; b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); } /* Set the last prefix */ if (lb && lb != insn->prefixes.bytes[3]) { if (unlikely(insn->prefixes.bytes[3])) { /* Swap the last prefix */ b = insn->prefixes.bytes[3]; for (i = 0; i < nb; i++) if (prefixes->bytes[i] == lb) insn_set_byte(prefixes, i, b); } insn_set_byte(&insn->prefixes, 3, lb); } /* Decode REX prefix */ if (insn->x86_64) { b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); if (inat_is_rex_prefix(attr)) { insn_field_set(&insn->rex_prefix, b, 1); insn->next_byte++; if (X86_REX_W(b)) /* REX.W overrides opnd_size */ insn->opnd_bytes = 8; } else if (inat_is_rex2_prefix(attr)) { insn_set_byte(&insn->rex_prefix, 0, b); b = peek_nbyte_next(insn_byte_t, insn, 1); insn_set_byte(&insn->rex_prefix, 1, b); insn->rex_prefix.nbytes = 2; insn->next_byte += 2; if (X86_REX_W(b)) /* REX.W overrides opnd_size */ insn->opnd_bytes = 8; insn->rex_prefix.got = 1; goto vex_end; } } insn->rex_prefix.got = 1; /* Decode VEX prefix */ b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); if (inat_is_vex_prefix(attr)) { insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); if (!insn->x86_64) { /* * In 32-bits mode, if the [7:6] bits (mod bits of * ModRM) on the second byte are not 11b, it is * LDS or LES or BOUND. */ if (X86_MODRM_MOD(b2) != 3) goto vex_end; } insn_set_byte(&insn->vex_prefix, 0, b); insn_set_byte(&insn->vex_prefix, 1, b2); if (inat_is_evex_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); insn_set_byte(&insn->vex_prefix, 2, b2); b2 = peek_nbyte_next(insn_byte_t, insn, 3); insn_set_byte(&insn->vex_prefix, 3, b2); insn->vex_prefix.nbytes = 4; insn->next_byte += 4; if (insn->x86_64 && X86_VEX_W(b2)) /* VEX.W overrides opnd_size */ insn->opnd_bytes = 8; } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); insn_set_byte(&insn->vex_prefix, 2, b2); insn->vex_prefix.nbytes = 3; insn->next_byte += 3; if (insn->x86_64 && X86_VEX_W(b2)) /* VEX.W overrides opnd_size */ insn->opnd_bytes = 8; } else { /* * For VEX2, fake VEX3-like byte#2. * Makes it easier to decode vex.W, vex.vvvv, * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. */ insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); insn->vex_prefix.nbytes = 2; insn->next_byte += 2; } } vex_end: insn->vex_prefix.got = 1; prefixes->got = 1; return 0; err_out: return -ENODATA; } /** * insn_get_opcode - collect opcode(s) * @insn: &struct insn containing instruction * * Populates @insn->opcode, updates @insn->next_byte to point past the * opcode byte(s), and set @insn->attr (except for groups). * If necessary, first collects any preceding (prefix) bytes. * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got * is already 1. * * Returns: * 0: on success * < 0: on error */ int insn_get_opcode(struct insn *insn) { struct insn_field *opcode = &insn->opcode; int pfx_id, ret; insn_byte_t op; if (opcode->got) return 0; ret = insn_get_prefixes(insn); if (ret) return ret; /* Get first opcode */ op = get_next(insn_byte_t, insn); insn_set_byte(opcode, 0, op); opcode->nbytes = 1; /* Check if there is VEX prefix or not */ if (insn_is_avx(insn)) { insn_byte_t m, p; m = insn_vex_m_bits(insn); p = insn_vex_p_bits(insn); insn->attr = inat_get_avx_attribute(op, m, p); /* SCALABLE EVEX uses p bits to encode operand size */ if (inat_evex_scalable(insn->attr) && !insn_vex_w_bit(insn) && p == INAT_PFX_OPNDSZ) insn->opnd_bytes = 2; if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))) { /* This instruction is bad */ insn->attr = 0; return -EINVAL; } /* VEX has only 1 byte for opcode */ goto end; } /* Check if there is REX2 prefix or not */ if (insn_is_rex2(insn)) { if (insn_rex2_m_bit(insn)) { /* map 1 is escape 0x0f */ insn_attr_t esc_attr = inat_get_opcode_attribute(0x0f); pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_escape_attribute(op, pfx_id, esc_attr); } else { insn->attr = inat_get_opcode_attribute(op); } goto end; } insn->attr = inat_get_opcode_attribute(op); while (inat_is_escape(insn->attr)) { /* Get escaped opcode */ op = get_next(insn_byte_t, insn); opcode->bytes[opcode->nbytes++] = op; pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); } if (inat_must_vex(insn->attr)) { /* This instruction is bad */ insn->attr = 0; return -EINVAL; } end: opcode->got = 1; return 0; err_out: return -ENODATA; } /** * insn_get_modrm - collect ModRM byte, if any * @insn: &struct insn containing instruction * * Populates @insn->modrm and updates @insn->next_byte to point past the * ModRM byte, if any. If necessary, first collects the preceding bytes * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. * * Returns: * 0: on success * < 0: on error */ int insn_get_modrm(struct insn *insn) { struct insn_field *modrm = &insn->modrm; insn_byte_t pfx_id, mod; int ret; if (modrm->got) return 0; ret = insn_get_opcode(insn); if (ret) return ret; if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); insn_field_set(modrm, mod, 1); if (inat_is_group(insn->attr)) { pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, insn->attr); if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) { /* Bad insn */ insn->attr = 0; return -EINVAL; } } } if (insn->x86_64 && inat_is_force64(insn->attr)) insn->opnd_bytes = 8; modrm->got = 1; return 0; err_out: return -ENODATA; } /** * insn_rip_relative() - Does instruction use RIP-relative addressing mode? * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * ModRM byte. No effect if @insn->x86_64 is 0. */ int insn_rip_relative(struct insn *insn) { struct insn_field *modrm = &insn->modrm; int ret; if (!insn->x86_64) return 0; ret = insn_get_modrm(insn); if (ret) return 0; /* * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); } /** * insn_get_sib() - Get the SIB byte of instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * ModRM byte. * * Returns: * 0: if decoding succeeded * < 0: otherwise. */ int insn_get_sib(struct insn *insn) { insn_byte_t modrm; int ret; if (insn->sib.got) return 0; ret = insn_get_modrm(insn); if (ret) return ret; if (insn->modrm.nbytes) { modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { insn_field_set(&insn->sib, get_next(insn_byte_t, insn), 1); } } insn->sib.got = 1; return 0; err_out: return -ENODATA; } /** * insn_get_displacement() - Get the displacement of instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * SIB byte. * Displacement value is sign-expanded. * * * Returns: * 0: if decoding succeeded * < 0: otherwise. */ int insn_get_displacement(struct insn *insn) { insn_byte_t mod, rm, base; int ret; if (insn->displacement.got) return 0; ret = insn_get_sib(insn); if (ret) return ret; if (insn->modrm.nbytes) { /* * Interpreting the modrm byte: * mod = 00 - no displacement fields (exceptions below) * mod = 01 - 1-byte displacement field * mod = 10 - displacement field is 4 bytes, or 2 bytes if * address size = 2 (0x67 prefix in 32-bit mode) * mod = 11 - no memory operand * * If address size = 2... * mod = 00, r/m = 110 - displacement field is 2 bytes * * If address size != 2... * mod != 11, r/m = 100 - SIB byte exists * mod = 00, SIB base = 101 - displacement field is 4 bytes * mod = 00, r/m = 101 - rip-relative addressing, displacement * field is 4 bytes */ mod = X86_MODRM_MOD(insn->modrm.value); rm = X86_MODRM_RM(insn->modrm.value); base = X86_SIB_BASE(insn->sib.value); if (mod == 3) goto out; if (mod == 1) { insn_field_set(&insn->displacement, get_next(signed char, insn), 1); } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { insn_field_set(&insn->displacement, get_next(short, insn), 2); } } else { if ((mod == 0 && rm == 5) || mod == 2 || (mod == 0 && base == 5)) { insn_field_set(&insn->displacement, get_next(int, insn), 4); } } } out: insn->displacement.got = 1; return 0; err_out: return -ENODATA; } /* Decode moffset16/32/64. Return 0 if failed */ static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: insn_field_set(&insn->moffset1, get_next(short, insn), 2); break; case 4: insn_field_set(&insn->moffset1, get_next(int, insn), 4); break; case 8: insn_field_set(&insn->moffset1, get_next(int, insn), 4); insn_field_set(&insn->moffset2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; } insn->moffset1.got = insn->moffset2.got = 1; return 1; err_out: return 0; } /* Decode imm v32(Iz). Return 0 if failed */ static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case 4: case 8: insn_field_set(&insn->immediate, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; } return 1; err_out: return 0; } /* Decode imm v64(Iv/Ov), Return 0 if failed */ static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn->immediate1.nbytes = 4; break; case 8: insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; } insn->immediate1.got = insn->immediate2.got = 1; return 1; err_out: return 0; } /* Decode ptr16:16/32(Ap) */ static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: insn_field_set(&insn->immediate1, get_next(int, insn), 4); break; case 8: /* ptr16:64 is not exist (no segment) */ return 0; default: /* opnd_bytes must be modified manually */ goto err_out; } insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); insn->immediate1.got = insn->immediate2.got = 1; return 1; err_out: return 0; } /** * insn_get_immediate() - Get the immediate in an instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * displacement bytes. * Basically, most of immediates are sign-expanded. Unsigned-value can be * computed by bit masking with ((1 << (nbytes * 8)) - 1) * * Returns: * 0: on success * < 0: on error */ int insn_get_immediate(struct insn *insn) { int ret; if (insn->immediate.got) return 0; ret = insn_get_displacement(insn); if (ret) return ret; if (inat_has_moffset(insn->attr)) { if (!__get_moffset(insn)) goto err_out; goto done; } if (!inat_has_immediate(insn->attr)) /* no immediates */ goto done; switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: insn_field_set(&insn->immediate, get_next(signed char, insn), 1); break; case INAT_IMM_WORD: insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case INAT_IMM_DWORD: insn_field_set(&insn->immediate, get_next(int, insn), 4); break; case INAT_IMM_QWORD: insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; case INAT_IMM_PTR: if (!__get_immptr(insn)) goto err_out; break; case INAT_IMM_VWORD32: if (!__get_immv32(insn)) goto err_out; break; case INAT_IMM_VWORD: if (!__get_immv(insn)) goto err_out; break; default: /* Here, insn must have an immediate, but failed */ goto err_out; } if (inat_has_second_immediate(insn->attr)) { insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); } done: insn->immediate.got = 1; return 0; err_out: return -ENODATA; } /** * insn_get_length() - Get the length of instruction * @insn: &struct insn containing instruction * * If necessary, first collects the instruction up to and including the * immediates bytes. * * Returns: * - 0 on success * - < 0 on error */ int insn_get_length(struct insn *insn) { int ret; if (insn->length) return 0; ret = insn_get_immediate(insn); if (ret) return ret; insn->length = (unsigned char)((unsigned long)insn->next_byte - (unsigned long)insn->kaddr); return 0; } /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { return insn->opcode.got && insn->modrm.got && insn->sib.got && insn->displacement.got && insn->immediate.got; } /** * insn_decode() - Decode an x86 instruction * @insn: &struct insn to be initialized * @kaddr: address (in kernel memory) of instruction (or copy thereof) * @buf_len: length of the insn buffer at @kaddr * @m: insn mode, see enum insn_mode * * Returns: * 0: if decoding succeeded * < 0: otherwise. */ int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m) { int ret; /* #define INSN_MODE_KERN -1 __ignore_sync_check__ mode is only valid in the kernel */ if (m == INSN_MODE_KERN) insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64)); else insn_init(insn, kaddr, buf_len, m == INSN_MODE_64); ret = insn_get_length(insn); if (ret) return ret; if (insn_complete(insn)) return 0; return -EINVAL; } |
622 622 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MM_TYPES_H #define _LINUX_MM_TYPES_H #include <linux/mm_types_task.h> #include <linux/auxvec.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/rbtree.h> #include <linux/maple_tree.h> #include <linux/rwsem.h> #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/uprobes.h> #include <linux/rcupdate.h> #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> #include <linux/percpu_counter.h> #include <asm/mmu.h> #ifndef AT_VECTOR_SIZE_ARCH #define AT_VECTOR_SIZE_ARCH 0 #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) #define INIT_PASID 0 struct address_space; struct mem_cgroup; /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using * a page, though if it is a pagecache page, rmap structures can tell us * who is mapping it. * * If you allocate the page using alloc_pages(), you can use some of the * space in struct page for your own purposes. The five words in the main * union are available, except for bit 0 of the first word which must be * kept clear. Many users use this word to store a pointer to an object * which is guaranteed to be aligned. If you use the same storage as * page->mapping, you must restore it to NULL before freeing the page. * * The mapcount field must not be used for own purposes. * * If you want to use the refcount field, it must be used in such a way * that other CPUs temporarily incrementing and then decrementing the * refcount does not cause problems. On receiving the page from * alloc_pages(), the refcount will be positive. * * If you allocate pages of order > 0, you can use some of the fields * in each subpage, but you may need to restore some of their values * afterwards. * * SLUB uses cmpxchg_double() to atomically update its freelist and counters. * That requires that freelist & counters in struct slab be adjacent and * double-word aligned. Because struct slab currently just reinterprets the * bits of struct page, we align all struct pages to double-word boundaries, * and ensure that 'freelist' is aligned within struct slab. */ #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) #else #define _struct_page_alignment __aligned(sizeof(unsigned long)) #endif struct page { unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ /* * Five words (20/40 bytes) are available in this union. * WARNING: bit 0 of the first word is used for PageTail(). That * means the other users of this union MUST NOT use the bit to * avoid collision and false-positive PageTail(). */ union { struct { /* Page cache and anonymous pages */ /** * @lru: Pageout list, eg. active_list protected by * lruvec->lru_lock. Sometimes used as a generic list * by the page owner. */ union { struct list_head lru; /* Or, for the Unevictable "LRU list" slot */ struct { /* Always even, to negate PageTail */ void *__filler; /* Count page's or folio's mlocks */ unsigned int mlock_count; }; /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; union { pgoff_t index; /* Our offset within mapping. */ unsigned long share; /* share count for fsdax */ }; /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. * Used for swp_entry_t if swapcache flag set. * Indicates order in the buddy system if PageBuddy. */ unsigned long private; }; struct { /* page_pool used by netstack */ /** * @pp_magic: magic value to avoid recycling non * page_pool allocated pages. */ unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; atomic_long_t pp_ref_count; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; struct { /* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; void *zone_device_data; /* * ZONE_DEVICE private pages are counted as being * mapped so the next 3 words hold the mapping, index, * and private fields from the source anonymous or * page cache page while the page is migrated to device * private memory. * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also * use the mapping, index, and private fields when * pmem backed DAX files are mapped. */ }; /** @rcu_head: You can use this to free a page by RCU. */ struct rcu_head rcu_head; }; union { /* This union is 4 bytes in size. */ /* * For head pages of typed folios, the value stored here * allows for determining what this page is used for. The * tail pages of typed folios will not store a type * (page_type == _mapcount == -1). * * See page-flags.h for a list of page types which are currently * stored here. * * Owners of typed folios may reuse the lower 16 bit of the * head page page_type field after setting the page type, * but must reset these 16 bit to -1 before clearing the * page type. */ unsigned int page_type; /* * For pages that are part of non-typed folios for which mappings * are tracked via the RMAP, encodes the number of times this page * is directly referenced by a page table. * * Note that the mapcount is always initialized to -1, so that * transitions both from it and to it can be tracked, using * atomic_inc_and_test() and atomic_add_negative(-1). */ atomic_t _mapcount; }; /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; #ifdef CONFIG_MEMCG unsigned long memcg_data; #elif defined(CONFIG_SLAB_OBJ_EXT) unsigned long _unused_slab_obj_exts; #endif /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with * highmem some memory is mapped into kernel virtual memory * dynamically, so we need a place to store that address. * Note that this field could be 16 bits on x86 ... ;) * * Architectures with slow multiplication can define * WANT_PAGE_VIRTUAL in asm/page.h */ #if defined(WANT_PAGE_VIRTUAL) void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif #ifdef CONFIG_KMSAN /* * KMSAN metadata for this page: * - shadow page: every bit indicates whether the corresponding * bit of the original page is initialized (0) or not (1); * - origin page: every 4 bytes contain an id of the stack trace * where the uninitialized value was created. */ struct page *kmsan_shadow; struct page *kmsan_origin; #endif } _struct_page_alignment; /* * struct encoded_page - a nonexistent type marking this pointer * * An 'encoded_page' pointer is a pointer to a regular 'struct page', but * with the low bits of the pointer indicating extra context-dependent * information. Only used in mmu_gather handling, and this acts as a type * system check on that use. * * We only really have two guaranteed bits in general, although you could * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE) * for more. * * Use the supplied helper functions to endcode/decode the pointer and bits. */ struct encoded_page; #define ENCODED_PAGE_BITS 3ul /* Perform rmap removal after we have flushed the TLB. */ #define ENCODED_PAGE_BIT_DELAY_RMAP 1ul /* * The next item in an encoded_page array is the "nr_pages" argument, specifying * the number of consecutive pages starting from this page, that all belong to * the same folio. For example, "nr_pages" corresponds to the number of folio * references that must be dropped. If this bit is not set, "nr_pages" is * implicitly 1. */ #define ENCODED_PAGE_BIT_NR_PAGES_NEXT 2ul static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags) { BUILD_BUG_ON(flags > ENCODED_PAGE_BITS); return (struct encoded_page *)(flags | (unsigned long)page); } static inline unsigned long encoded_page_flags(struct encoded_page *page) { return ENCODED_PAGE_BITS & (unsigned long)page; } static inline struct page *encoded_page_ptr(struct encoded_page *page) { return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page); } static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr) { VM_WARN_ON_ONCE((nr << 2) >> 2 != nr); return (struct encoded_page *)(nr << 2); } static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page) { return ((unsigned long)page) >> 2; } /* * A swap entry has to fit into a "unsigned long", as the entry is hidden * in the "index" field of the swapper address space. */ typedef struct { unsigned long val; } swp_entry_t; /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. * @lru: Least Recently Used list; tracks how recently this folio was used. * @mlock_count: Number of times this folio has been pinned by mlock(). * @mapping: The file this page belongs to, or refers to the anon_vma for * anonymous memory. * @index: Offset within the file, in units of pages. For anonymous memory, * this is the index from the beginning of the mmap. * @private: Filesystem per-folio data (see folio_attach_private()). * @swap: Used for swp_entry_t if folio_test_swapcache(). * @_mapcount: Do not access this member directly. Use folio_mapcount() to * find out how many times this folio is mapped by userspace. * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. * @virtual: Virtual address in the kernel direct map. * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_large_mapcount: Do not use directly, call folio_mapcount(). * @_nr_pages_mapped: Do not use outside of rmap and debug code. * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). * @_deferred_list: Folios to be split under memory pressure. * @_unused_slab_obj_exts: Placeholder to match obj_exts in struct slab. * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that * same power-of-two. It is at least as large as %PAGE_SIZE. If it is * in the page cache, it is at a file offset which is a multiple of that * power-of-two. It may be mapped into userspace at an address which is * at an arbitrary page offset, but its kernel virtual address is aligned * to its size. */ struct folio { /* private: don't document the anon union */ union { struct { /* public: */ unsigned long flags; union { struct list_head lru; /* private: avoid cluttering the output */ struct { void *__filler; /* public: */ unsigned int mlock_count; /* private: */ }; /* public: */ }; struct address_space *mapping; pgoff_t index; union { void *private; swp_entry_t swap; }; atomic_t _mapcount; atomic_t _refcount; #ifdef CONFIG_MEMCG unsigned long memcg_data; #elif defined(CONFIG_SLAB_OBJ_EXT) unsigned long _unused_slab_obj_exts; #endif #if defined(WANT_PAGE_VIRTUAL) void *virtual; #endif #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif /* private: the union with struct page is transitional */ }; struct page page; }; union { struct { unsigned long _flags_1; unsigned long _head_1; /* public: */ atomic_t _large_mapcount; atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; #endif /* private: the union with struct page is transitional */ }; struct page __page_1; }; union { struct { unsigned long _flags_2; unsigned long _head_2; /* public: */ void *_hugetlb_subpool; void *_hugetlb_cgroup; void *_hugetlb_cgroup_rsvd; void *_hugetlb_hwpoison; /* private: the union with struct page is transitional */ }; struct { unsigned long _flags_2a; unsigned long _head_2a; /* public: */ struct list_head _deferred_list; /* private: the union with struct page is transitional */ }; struct page __page_2; }; }; #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); FOLIO_MATCH(index, index); FOLIO_MATCH(private, private); FOLIO_MATCH(_mapcount, _mapcount); FOLIO_MATCH(_refcount, _refcount); #ifdef CONFIG_MEMCG FOLIO_MATCH(memcg_data, memcg_data); #endif #if defined(WANT_PAGE_VIRTUAL) FOLIO_MATCH(virtual, virtual); #endif #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS FOLIO_MATCH(_last_cpupid, _last_cpupid); #endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); FOLIO_MATCH(flags, _flags_2a); FOLIO_MATCH(compound_head, _head_2a); #undef FOLIO_MATCH /** * struct ptdesc - Memory descriptor for page tables. * @__page_flags: Same as page flags. Powerpc only. * @pt_rcu_head: For freeing page table pages. * @pt_list: List of used page tables. Used for s390 gmap shadow pages * (which are not linked into the user page tables) and x86 * pgds. * @_pt_pad_1: Padding that aliases with page's compound head. * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. * @__page_mapping: Aliases with page->mapping. Unused for page tables. * @pt_index: Used for s390 gmap. * @pt_mm: Used for x86 pgds. * @pt_frag_refcount: For fragmented page table tracking. Powerpc only. * @pt_share_count: Used for HugeTLB PMD page table share count. * @_pt_pad_2: Padding to ensure proper alignment. * @ptl: Lock for the page table. * @__page_type: Same as page->page_type. Unused for page tables. * @__page_refcount: Same as page refcount. * @pt_memcg_data: Memcg data. Tracked for page tables here. * * This struct overlays struct page for now. Do not modify without a good * understanding of the issues. */ struct ptdesc { unsigned long __page_flags; union { struct rcu_head pt_rcu_head; struct list_head pt_list; struct { unsigned long _pt_pad_1; pgtable_t pmd_huge_pte; }; }; unsigned long __page_mapping; union { pgoff_t pt_index; struct mm_struct *pt_mm; atomic_t pt_frag_refcount; #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING atomic_t pt_share_count; #endif }; union { unsigned long _pt_pad_2; #if ALLOC_SPLIT_PTLOCKS spinlock_t *ptl; #else spinlock_t ptl; #endif }; unsigned int __page_type; atomic_t __page_refcount; #ifdef CONFIG_MEMCG unsigned long pt_memcg_data; #endif }; #define TABLE_MATCH(pg, pt) \ static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) TABLE_MATCH(flags, __page_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); TABLE_MATCH(index, pt_index); TABLE_MATCH(rcu_head, pt_rcu_head); TABLE_MATCH(page_type, __page_type); TABLE_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG TABLE_MATCH(memcg_data, pt_memcg_data); #endif #undef TABLE_MATCH static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); #define ptdesc_page(pt) (_Generic((pt), \ const struct ptdesc *: (const struct page *)(pt), \ struct ptdesc *: (struct page *)(pt))) #define ptdesc_folio(pt) (_Generic((pt), \ const struct ptdesc *: (const struct folio *)(pt), \ struct ptdesc *: (struct folio *)(pt))) #define page_ptdesc(p) (_Generic((p), \ const struct page *: (const struct ptdesc *)(p), \ struct page *: (struct ptdesc *)(p))) #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc) { atomic_set(&ptdesc->pt_share_count, 0); } static inline void ptdesc_pmd_pts_inc(struct ptdesc *ptdesc) { atomic_inc(&ptdesc->pt_share_count); } static inline void ptdesc_pmd_pts_dec(struct ptdesc *ptdesc) { atomic_dec(&ptdesc->pt_share_count); } static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc) { return atomic_read(&ptdesc->pt_share_count); } #else static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc) { } #endif /* * Used for sizing the vmemmap region on some architectures */ #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) /* * page_private can be used on tail pages. However, PagePrivate is only * checked by the VM on the head page. So page_private on the tail pages * should be used for data that's ancillary to the head page (eg attaching * buffer heads to tail pages after attaching buffer heads to the head page) */ #define page_private(page) ((page)->private) static inline void set_page_private(struct page *page, unsigned long private) { page->private = private; } static inline void *folio_get_private(struct folio *folio) { return folio->private; } typedef unsigned long vm_flags_t; /* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that * map parts of them. */ struct vm_region { struct rb_node vm_rb; /* link in global region tree */ vm_flags_t vm_flags; /* VMA vm_flags */ unsigned long vm_start; /* start address of region */ unsigned long vm_end; /* region initialised to here */ unsigned long vm_top; /* region allocated to here */ unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ struct file *vm_file; /* the backing file or NULL */ int vm_usage; /* region usage count (access under nommu_region_sem) */ bool vm_icache_flushed : 1; /* true if the icache has been flushed for * this region */ }; #ifdef CONFIG_USERFAULTFD #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, }) struct vm_userfaultfd_ctx { struct userfaultfd_ctx *ctx; }; #else /* CONFIG_USERFAULTFD */ #define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {}) struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ struct anon_vma_name { struct kref kref; /* The name needs to be at the end because it is dynamically sized. */ char name[]; }; #ifdef CONFIG_ANON_VMA_NAME /* * mmap_lock should be read-locked when calling anon_vma_name(). Caller should * either keep holding the lock while using the returned pointer or it should * raise anon_vma_name refcount before releasing the lock. */ struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); struct anon_vma_name *anon_vma_name_alloc(const char *name); void anon_vma_name_free(struct kref *kref); #else /* CONFIG_ANON_VMA_NAME */ static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) { return NULL; } static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) { return NULL; } #endif struct vma_lock { struct rw_semaphore lock; }; struct vma_numab_state { /* * Initialised as time in 'jiffies' after which VMA * should be scanned. Delays first scan of new VMA by at * least sysctl_numa_balancing_scan_delay: */ unsigned long next_scan; /* * Time in jiffies when pids_active[] is reset to * detect phase change behaviour: */ unsigned long pids_active_reset; /* * Approximate tracking of PIDs that trapped a NUMA hinting * fault. May produce false positives due to hash collisions. * * [0] Previous PID tracking * [1] Current PID tracking * * Window moves after next_pid_reset has expired approximately * every VMA_PID_RESET_PERIOD jiffies: */ unsigned long pids_active[2]; /* MM scan sequence ID when scan first started after VMA creation */ int start_scan_seq; /* * MM scan sequence ID when the VMA was last completely scanned. * A VMA is not eligible for scanning if prev_scan_seq == numa_scan_seq */ int prev_scan_seq; }; /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). * * Only explicitly marked struct members may be accessed by RCU readers before * getting a stable reference. */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ union { struct { /* VMA covers [vm_start; vm_end) addresses within mm */ unsigned long vm_start; unsigned long vm_end; }; #ifdef CONFIG_PER_VMA_LOCK struct rcu_head vm_rcu; /* Used for deferred freeing. */ #endif }; /* * The address space we belong to. * Unstable RCU readers are allowed to read this. */ struct mm_struct *vm_mm; pgprot_t vm_page_prot; /* Access permissions of this VMA. */ /* * Flags, see mm.h. * To modify use vm_flags_{init|reset|set|clear|mod} functions. */ union { const vm_flags_t vm_flags; vm_flags_t __private __vm_flags; }; #ifdef CONFIG_PER_VMA_LOCK /* * Flag to indicate areas detached from the mm->mm_mt tree. * Unstable RCU readers are allowed to read this. */ bool detached; /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) * - vm_lock->lock (in write mode) * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) * - vm_lock->lock (in read or write mode) * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * * This sequence counter is explicitly allowed to overflow; sequence * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ unsigned int vm_lock_seq; /* Unstable RCU readers are allowed to read this. */ struct vma_lock *vm_lock; #endif /* * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. * */ struct { struct rb_node rb; unsigned long rb_subtree_last; } shared; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack * or brk vma (with NULL file) can only be in an anon_vma list. */ struct list_head anon_vma_chain; /* Serialized by mmap_lock & * page_table_lock */ struct anon_vma *anon_vma; /* Serialized by page_table_lock */ /* Function pointers to deal with this struct. */ const struct vm_operations_struct *vm_ops; /* Information about our backing store: */ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE units */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ #ifdef CONFIG_ANON_VMA_NAME /* * For private and shared anonymous mappings, a pointer to a null * terminated string containing the name given to the vma, or NULL if * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. */ struct anon_vma_name *anon_name; #endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif #ifndef CONFIG_MMU struct vm_region *vm_region; /* NOMMU mapping region */ #endif #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; #ifdef CONFIG_NUMA #define vma_policy(vma) ((vma)->vm_policy) #else #define vma_policy(vma) NULL #endif #ifdef CONFIG_SCHED_MM_CID struct mm_cid { u64 time; int cid; int recent_cid; }; #endif struct kioctx_table; struct iommu_mm_data; struct mm_struct { struct { /* * Fields which are often written to are placed in a separate * cache line. */ struct { /** * @mm_count: The number of references to &struct * mm_struct (@mm_users count as 1). * * Use mmgrab()/mmdrop() to modify. When this drops to * 0, the &struct mm_struct is freed. */ atomic_t mm_count; } ____cacheline_aligned_in_smp; struct maple_tree mm_mt; unsigned long mmap_base; /* base of mmap area */ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES /* Base addresses for compatible mmap() */ unsigned long mmap_compat_base; unsigned long mmap_compat_legacy_base; #endif unsigned long task_size; /* size of task vm space */ pgd_t * pgd; #ifdef CONFIG_MEMBARRIER /** * @membarrier_state: Flags controlling membarrier behavior. * * This field is close to @pgd to hopefully fit in the same * cache-line, which needs to be touched by switch_mm(). */ atomic_t membarrier_state; #endif /** * @mm_users: The number of users including userspace. * * Use mmget()/mmget_not_zero()/mmput() to modify. When this * drops to 0 (i.e. when the task exits and there are no other * temporary reference holders), we also release a reference on * @mm_count (which may then free the &struct mm_struct if * @mm_count also drops to 0). */ atomic_t mm_users; #ifdef CONFIG_SCHED_MM_CID /** * @pcpu_cid: Per-cpu current cid. * * Keep track of the currently allocated mm_cid for each cpu. * The per-cpu mm_cid values are serialized by their respective * runqueue locks. */ struct mm_cid __percpu *pcpu_cid; /* * @mm_cid_next_scan: Next mm_cid scan (in jiffies). * * When the next mm_cid scan is due (in jiffies). */ unsigned long mm_cid_next_scan; /** * @nr_cpus_allowed: Number of CPUs allowed for mm. * * Number of CPUs allowed in the union of all mm's * threads allowed CPUs. */ unsigned int nr_cpus_allowed; /** * @max_nr_cid: Maximum number of allowed concurrency * IDs allocated. * * Track the highest number of allowed concurrency IDs * allocated for the mm. */ atomic_t max_nr_cid; /** * @cpus_allowed_lock: Lock protecting mm cpus_allowed. * * Provide mutual exclusion for mm cpus_allowed and * mm nr_cpus_allowed updates. */ raw_spinlock_t cpus_allowed_lock; #endif #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* size of all page tables */ #endif int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some * counters */ /* * With some kernel config, the current mmap_lock's offset * inside 'mm_struct' is at 0x120, which is very optimal, as * its two hot fields 'count' and 'owner' sit in 2 different * cachelines, and when mmap_lock is highly contended, both * of the 2 fields will be accessed frequently, current layout * will help to reduce cache bouncing. * * So please be careful with adding new fields before * mmap_lock, which can easily push the 2 fields into one * cacheline. */ struct rw_semaphore mmap_lock; struct list_head mmlist; /* List of maybe swapped mm's. These * are globally strung together off * init_mm.mmlist, and are protected * by mmlist_lock */ #ifdef CONFIG_PER_VMA_LOCK /* * This field has lock-like semantics, meaning it is sometimes * accessed with ACQUIRE/RELEASE semantics. * Roughly speaking, incrementing the sequence number is * equivalent to releasing locks on VMAs; reading the sequence * number can be part of taking a read lock on a VMA. * Incremented every time mmap_lock is write-locked/unlocked. * Initialized to 0, therefore odd values indicate mmap_lock * is write-locked and even values that it's released. * * Can be modified under write mmap_lock using RELEASE * semantics. * Can be read with no other protection when holding write * mmap_lock. * Can be read with ACQUIRE semantics if not holding write * mmap_lock. */ seqcount_t mm_lock_seq; #endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ unsigned long total_vm; /* Total pages mapped */ unsigned long locked_vm; /* Pages that have PG_mlocked set */ atomic64_t pinned_vm; /* Refcount permanently increased */ unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ unsigned long stack_vm; /* VM_STACK */ unsigned long def_flags; /** * @write_protect_seq: Locked when any thread is write * protecting pages mapped by this mm to enforce a later COW, * for instance during page table copying for fork(). */ seqcount_t write_protect_seq; spinlock_t arg_lock; /* protect the below fields */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ struct percpu_counter rss_stat[NR_MM_COUNTERS]; struct linux_binfmt *binfmt; /* Architecture-specific MM context */ mm_context_t context; unsigned long flags; /* Must use atomic bitops to access */ #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; #endif #ifdef CONFIG_MEMCG /* * "owner" points to a task that is regarded as the canonical * user/owner of this mm. All of the following must be true in * order for it to be changed: * * current == mm->owner * current->mm != mm * new_owner->mm == mm * new_owner->alloc_lock is held */ struct task_struct __rcu *owner; #endif struct user_namespace *user_ns; /* store ref to file /proc/<pid>/exe symlink points to */ struct file __rcu *exe_file; #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_subscriptions *notifier_subscriptions; #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS) pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_NUMA_BALANCING /* * numa_next_scan is the next time that PTEs will be remapped * PROT_NONE to trigger NUMA hinting faults; such faults gather * statistics and migrate pages to new nodes if necessary. */ unsigned long numa_next_scan; /* Restart point for scanning and remapping PTEs. */ unsigned long numa_scan_offset; /* numa_scan_seq prevents two threads remapping PTEs. */ int numa_scan_seq; #endif /* * An operation with batched TLB flushing is going on. Anything * that can move process memory needs to flush the TLB when * moving a PROT_NONE mapped page. */ atomic_t tlb_flush_pending; #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ atomic_t tlb_flush_batched; #endif struct uprobes_state uprobes_state; #ifdef CONFIG_PREEMPT_RT struct rcu_head delayed_drop; #endif #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; #ifdef CONFIG_IOMMU_MM_DATA struct iommu_mm_data *iommu_mm; #endif #ifdef CONFIG_KSM /* * Represent how many pages of this process are involved in KSM * merging (not including ksm_zero_pages). */ unsigned long ksm_merging_pages; /* * Represent how many pages are checked for ksm merging * including merged and not merged. */ unsigned long ksm_rmap_items; /* * Represent how many empty pages are merged with kernel zero * pages when enabling KSM use_zero_pages. */ atomic_long_t ksm_zero_pages; #endif /* CONFIG_KSM */ #ifdef CONFIG_LRU_GEN_WALKS_MMU struct { /* this mm_struct is on lru_gen_mm_list */ struct list_head list; /* * Set when switching to this mm_struct, as a hint of * whether it has been used since the last time per-node * page table walkers cleared the corresponding bits. */ unsigned long bitmap; #ifdef CONFIG_MEMCG /* points to the memcg of "owner" above */ struct mem_cgroup *memcg; #endif } lru_gen; #endif /* CONFIG_LRU_GEN_WALKS_MMU */ } __randomize_layout; /* * The mm_cpumask needs to be at the end of mm_struct, because it * is dynamically sized based on nr_cpu_ids. */ unsigned long cpu_bitmap[]; }; #define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ static inline void mm_init_cpumask(struct mm_struct *mm) { unsigned long cpu_bitmap = (unsigned long)mm; cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap); cpumask_clear((struct cpumask *)cpu_bitmap); } /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { return (struct cpumask *)&mm->cpu_bitmap; } #ifdef CONFIG_LRU_GEN struct lru_gen_mm_list { /* mm_struct list for page table walkers */ struct list_head fifo; /* protects the list above */ spinlock_t lock; }; #endif /* CONFIG_LRU_GEN */ #ifdef CONFIG_LRU_GEN_WALKS_MMU void lru_gen_add_mm(struct mm_struct *mm); void lru_gen_del_mm(struct mm_struct *mm); void lru_gen_migrate_mm(struct mm_struct *mm); static inline void lru_gen_init_mm(struct mm_struct *mm) { INIT_LIST_HEAD(&mm->lru_gen.list); mm->lru_gen.bitmap = 0; #ifdef CONFIG_MEMCG mm->lru_gen.memcg = NULL; #endif } static inline void lru_gen_use_mm(struct mm_struct *mm) { /* * When the bitmap is set, page reclaim knows this mm_struct has been * used since the last time it cleared the bitmap. So it might be worth * walking the page tables of this mm_struct to clear the accessed bit. */ WRITE_ONCE(mm->lru_gen.bitmap, -1); } #else /* !CONFIG_LRU_GEN_WALKS_MMU */ static inline void lru_gen_add_mm(struct mm_struct *mm) { } static inline void lru_gen_del_mm(struct mm_struct *mm) { } static inline void lru_gen_migrate_mm(struct mm_struct *mm) { } static inline void lru_gen_init_mm(struct mm_struct *mm) { } static inline void lru_gen_use_mm(struct mm_struct *mm) { } #endif /* CONFIG_LRU_GEN_WALKS_MMU */ struct vma_iterator { struct ma_state mas; }; #define VMA_ITERATOR(name, __mm, __addr) \ struct vma_iterator name = { \ .mas = { \ .tree = &(__mm)->mm_mt, \ .index = __addr, \ .node = NULL, \ .status = ma_start, \ }, \ } static inline void vma_iter_init(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long addr) { mas_init(&vmi->mas, &mm->mm_mt, addr); } #ifdef CONFIG_SCHED_MM_CID enum mm_cid_state { MM_CID_UNSET = -1U, /* Unset state has lazy_put flag set. */ MM_CID_LAZY_PUT = (1U << 31), }; static inline bool mm_cid_is_unset(int cid) { return cid == MM_CID_UNSET; } static inline bool mm_cid_is_lazy_put(int cid) { return !mm_cid_is_unset(cid) && (cid & MM_CID_LAZY_PUT); } static inline bool mm_cid_is_valid(int cid) { return !(cid & MM_CID_LAZY_PUT); } static inline int mm_cid_set_lazy_put(int cid) { return cid | MM_CID_LAZY_PUT; } static inline int mm_cid_clear_lazy_put(int cid) { return cid & ~MM_CID_LAZY_PUT; } /* * mm_cpus_allowed: Union of all mm's threads allowed CPUs. */ static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm) { unsigned long bitmap = (unsigned long)mm; bitmap += offsetof(struct mm_struct, cpu_bitmap); /* Skip cpu_bitmap */ bitmap += cpumask_size(); return (struct cpumask *)bitmap; } /* Accessor for struct mm_struct's cidmask. */ static inline cpumask_t *mm_cidmask(struct mm_struct *mm) { unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm); /* Skip mm_cpus_allowed */ cid_bitmap += cpumask_size(); return (struct cpumask *)cid_bitmap; } static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { int i; for_each_possible_cpu(i) { struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i); pcpu_cid->cid = MM_CID_UNSET; pcpu_cid->recent_cid = MM_CID_UNSET; pcpu_cid->time = 0; } mm->nr_cpus_allowed = p->nr_cpus_allowed; atomic_set(&mm->max_nr_cid, 0); raw_spin_lock_init(&mm->cpus_allowed_lock); cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask); cpumask_clear(mm_cidmask(mm)); } static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p) { mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid); if (!mm->pcpu_cid) return -ENOMEM; mm_init_cid(mm, p); return 0; } #define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) static inline void mm_destroy_cid(struct mm_struct *mm) { free_percpu(mm->pcpu_cid); mm->pcpu_cid = NULL; } static inline unsigned int mm_cid_size(void) { return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */ } static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { struct cpumask *mm_allowed = mm_cpus_allowed(mm); if (!mm) return; /* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */ raw_spin_lock(&mm->cpus_allowed_lock); cpumask_or(mm_allowed, mm_allowed, cpumask); WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed)); raw_spin_unlock(&mm->cpus_allowed_lock); } #else /* CONFIG_SCHED_MM_CID */ static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { } static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; } static inline void mm_destroy_cid(struct mm_struct *mm) { } static inline unsigned int mm_cid_size(void) { return 0; } static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { } #endif /* CONFIG_SCHED_MM_CID */ struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_finish_mmu(struct mmu_gather *tlb); struct vm_fault; /** * typedef vm_fault_t - Return type for page fault handlers. * * Page fault handlers return a bitmask of %VM_FAULT values. */ typedef __bitwise unsigned int vm_fault_t; /** * enum vm_fault_reason - Page fault handlers return a bitmask of * these values to tell the core VM what happened when handling the * fault. Used to decide whether a process gets delivered SIGBUS or * just gets major/minor fault counters bumped up. * * @VM_FAULT_OOM: Out Of Memory * @VM_FAULT_SIGBUS: Bad access * @VM_FAULT_MAJOR: Page read from storage * @VM_FAULT_HWPOISON: Hit poisoned small page * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded * in upper bits * @VM_FAULT_SIGSEGV: segmentation fault * @VM_FAULT_NOPAGE: ->fault installed the pte, not return page * @VM_FAULT_LOCKED: ->fault locked the returned page * @VM_FAULT_RETRY: ->fault blocked, must retry * @VM_FAULT_FALLBACK: huge page fault failed, fall back to small * @VM_FAULT_DONE_COW: ->fault has fully handled COW * @VM_FAULT_NEEDDSYNC: ->fault did not modify page tables and needs * fsync() to complete (for synchronous page faults * in DAX) * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released * @VM_FAULT_HINDEX_MASK: mask HINDEX value * */ enum vm_fault_reason { VM_FAULT_OOM = (__force vm_fault_t)0x000001, VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002, VM_FAULT_MAJOR = (__force vm_fault_t)0x000004, VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010, VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020, VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040, VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100, VM_FAULT_LOCKED = (__force vm_fault_t)0x000200, VM_FAULT_RETRY = (__force vm_fault_t)0x000400, VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, }; /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16)) #define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \ VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \ VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK) #define VM_FAULT_RESULT_TRACE \ { VM_FAULT_OOM, "OOM" }, \ { VM_FAULT_SIGBUS, "SIGBUS" }, \ { VM_FAULT_MAJOR, "MAJOR" }, \ { VM_FAULT_HWPOISON, "HWPOISON" }, \ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ { VM_FAULT_NOPAGE, "NOPAGE" }, \ { VM_FAULT_LOCKED, "LOCKED" }, \ { VM_FAULT_RETRY, "RETRY" }, \ { VM_FAULT_FALLBACK, "FALLBACK" }, \ { VM_FAULT_DONE_COW, "DONE_COW" }, \ { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" }, \ { VM_FAULT_COMPLETED, "COMPLETED" } struct vm_special_mapping { const char *name; /* The name, e.g. "[vdso]". */ /* * If .fault is not provided, this points to a * NULL-terminated array of pages that back the special mapping. * * This must not be NULL unless .fault is provided. */ struct page **pages; /* * If non-NULL, then this is called to resolve page faults * on the special mapping. If used, .pages is not checked. */ vm_fault_t (*fault)(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf); int (*mremap)(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma); void (*close)(const struct vm_special_mapping *sm, struct vm_area_struct *vma); }; enum tlb_flush_reason { TLB_FLUSH_ON_TASK_SWITCH, TLB_REMOTE_SHOOTDOWN, TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, }; /** * enum fault_flag - Fault flag definitions. * @FAULT_FLAG_WRITE: Fault was a write fault. * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. * @FAULT_FLAG_TRIED: The fault has been tried once. * @FAULT_FLAG_USER: The fault originated in userspace. * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to break COW in a * COW mapping, making sure that an exclusive anon page is * mapped after the fault. * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. * We should only access orig_pte if this flag set. * @FAULT_FLAG_VMA_LOCK: The fault is handled under VMA lock. * * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify * whether we would allow page faults to retry by specifying these two * fault flags correctly. Currently there can be three legal combinations: * * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and * this is the first try * * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and * we've already tried at least once * * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry * * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never * be used. Note that page faults can be allowed to retry for multiple times, * in which case we'll have an initial fault with flags (a) then later on * continuous faults with flags (b). We should always try to detect pending * signals before a retry to make sure the continuous page faults can still be * interrupted if necessary. * * The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal. * FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when * applied to mappings that are not COW mappings. */ enum fault_flag { FAULT_FLAG_WRITE = 1 << 0, FAULT_FLAG_MKWRITE = 1 << 1, FAULT_FLAG_ALLOW_RETRY = 1 << 2, FAULT_FLAG_RETRY_NOWAIT = 1 << 3, FAULT_FLAG_KILLABLE = 1 << 4, FAULT_FLAG_TRIED = 1 << 5, FAULT_FLAG_USER = 1 << 6, FAULT_FLAG_REMOTE = 1 << 7, FAULT_FLAG_INSTRUCTION = 1 << 8, FAULT_FLAG_INTERRUPTIBLE = 1 << 9, FAULT_FLAG_UNSHARE = 1 << 10, FAULT_FLAG_ORIG_PTE_VALID = 1 << 11, FAULT_FLAG_VMA_LOCK = 1 << 12, }; typedef unsigned int __bitwise zap_flags_t; /* Flags for clear_young_dirty_ptes(). */ typedef int __bitwise cydp_t; /* Clear the access bit */ #define CYDP_CLEAR_YOUNG ((__force cydp_t)BIT(0)) /* Clear the dirty bit */ #define CYDP_CLEAR_DIRTY ((__force cydp_t)BIT(1)) /* * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each * other. Here is what they mean, and how to use them: * * * FIXME: For pages which are part of a filesystem, mappings are subject to the * lifetime enforced by the filesystem and we need guarantees that longterm * users like RDMA and V4L2 only establish mappings which coordinate usage with * the filesystem. Ideas for this coordination include revoking the longterm * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was * added after the problem with filesystems was found FS DAX VMAs are * specifically failed. Filesystem pages are still subject to bugs and use of * FOLL_LONGTERM should be avoided on those pages. * * In the CMA case: long term pins in a CMA region would unnecessarily fragment * that region. And so, CMA attempts to migrate the page before pinning, when * FOLL_LONGTERM is specified. * * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, * but an additional pin counting system) will be invoked. This is intended for * anything that gets a page reference and then touches page data (for example, * Direct IO). This lets the filesystem know that some non-file-system entity is * potentially changing the pages' data. In contrast to FOLL_GET (whose pages * are released via put_page()), FOLL_PIN pages must be released, ultimately, by * a call to unpin_user_page(). * * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different * and separate refcounting mechanisms, however, and that means that each has * its own acquire and release mechanisms: * * FOLL_GET: get_user_pages*() to acquire, and put_page() to release. * * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. * * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based * calls applied to them, and that's perfectly OK. This is a constraint on the * callers, not on the pages.) * * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never * directly by the caller. That's in order to help avoid mismatches when * releasing pages: get_user_pages*() pages must be released via put_page(), * while pin_user_pages*() pages must be released via unpin_user_page(). * * Please see Documentation/core-api/pin_user_pages.rst for more information. */ enum { /* check pte is writable */ FOLL_WRITE = 1 << 0, /* do get_page on page */ FOLL_GET = 1 << 1, /* give error on hole if it would be zero */ FOLL_DUMP = 1 << 2, /* get_user_pages read/write w/o permission */ FOLL_FORCE = 1 << 3, /* * if a disk transfer is needed, start the IO and return without waiting * upon it */ FOLL_NOWAIT = 1 << 4, /* do not fault in pages */ FOLL_NOFAULT = 1 << 5, /* check page is hwpoisoned */ FOLL_HWPOISON = 1 << 6, /* don't do file mappings */ FOLL_ANON = 1 << 7, /* * FOLL_LONGTERM indicates that the page will be held for an indefinite * time period _often_ under userspace control. This is in contrast to * iov_iter_get_pages(), whose usages are transient. */ FOLL_LONGTERM = 1 << 8, /* split huge pmd before returning */ FOLL_SPLIT_PMD = 1 << 9, /* allow returning PCI P2PDMA pages */ FOLL_PCI_P2PDMA = 1 << 10, /* allow interrupts from generic signals */ FOLL_INTERRUPTIBLE = 1 << 11, /* * Always honor (trigger) NUMA hinting faults. * * FOLL_WRITE implicitly honors NUMA hinting faults because a * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE * apply). get_user_pages_fast_only() always implicitly honors NUMA * hinting faults. */ FOLL_HONOR_NUMA_FAULT = 1 << 12, /* See also internal only FOLL flags in mm/internal.h */ }; /* mm flags */ /* * The first two bits represent core dump modes for set-user-ID, * the modes are SUID_DUMP_* defined in linux/sched/coredump.h */ #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 #define MMF_DUMP_MAPPED_PRIVATE 4 #define MMF_DUMP_MAPPED_SHARED 5 #define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_HUGETLB_PRIVATE 7 #define MMF_DUMP_HUGETLB_SHARED 8 #define MMF_DUMP_DAX_PRIVATE 9 #define MMF_DUMP_DAX_SHARED 10 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS #define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS # define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) #else # define MMF_DUMP_MASK_DEFAULT_ELF 0 #endif /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when mm is available for khugepaged */ /* * This one-shot flag is dropped due to necessity of changing exe once again * on NFS restore */ //#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either * replaced in the future by mm.pinned_vm when it becomes stable, or grow into * a counter on its own. We're aggresive on this bit for now: even if the * pinned pages were unpinned later on, we'll still keep this bit set for the * lifecycle of this mm, just for simplicity. */ #define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ #define MMF_HAS_MDWE 28 #define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) #define MMF_HAS_MDWE_NO_INHERIT 29 #define MMF_VM_MERGE_ANY 30 #define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) #define MMF_TOPDOWN 31 /* mm searches top down by default */ #define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) static inline unsigned long mmf_init_flags(unsigned long flags) { if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) flags &= ~((1UL << MMF_HAS_MDWE) | (1UL << MMF_HAS_MDWE_NO_INHERIT)); return flags & MMF_INIT_MASK; } #endif /* _LINUX_MM_TYPES_H */ |
10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2020 ARM Ltd. */ #ifndef __ASM_VDSO_PROCESSOR_H #define __ASM_VDSO_PROCESSOR_H #ifndef __ASSEMBLY__ /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static __always_inline void rep_nop(void) { asm volatile("rep; nop" ::: "memory"); } static __always_inline void cpu_relax(void) { rep_nop(); } struct getcpu_cache; notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); #endif /* __ASSEMBLY__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ |
113 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_VMX_HYPERV_H #define __KVM_X86_VMX_HYPERV_H #include <linux/kvm_host.h> #include "vmcs12.h" #include "vmx.h" #define EVMPTR_INVALID (-1ULL) #define EVMPTR_MAP_PENDING (-2ULL) enum nested_evmptrld_status { EVMPTRLD_DISABLED, EVMPTRLD_SUCCEEDED, EVMPTRLD_VMFAIL, EVMPTRLD_ERROR, }; #ifdef CONFIG_KVM_HYPERV static inline bool evmptr_is_valid(u64 evmptr) { return evmptr != EVMPTR_INVALID && evmptr != EVMPTR_MAP_PENDING; } static inline bool nested_vmx_is_evmptr12_valid(struct vcpu_vmx *vmx) { return evmptr_is_valid(vmx->nested.hv_evmcs_vmptr); } static inline bool evmptr_is_set(u64 evmptr) { return evmptr != EVMPTR_INVALID; } static inline bool nested_vmx_is_evmptr12_set(struct vcpu_vmx *vmx) { return evmptr_is_set(vmx->nested.hv_evmcs_vmptr); } static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx) { return vmx->nested.hv_evmcs; } static inline bool guest_cpu_cap_has_evmcs(struct kvm_vcpu *vcpu) { /* * eVMCS is exposed to the guest if Hyper-V is enabled in CPUID and * eVMCS has been explicitly enabled by userspace. */ return vcpu->arch.hyperv_enabled && to_vmx(vcpu)->nested.enlightened_vmcs_enabled; } u64 nested_get_evmptr(struct kvm_vcpu *vcpu); uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int nested_evmcs_check_controls(struct vmcs12 *vmcs12); bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu); void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); #else static inline bool evmptr_is_valid(u64 evmptr) { return false; } static inline bool nested_vmx_is_evmptr12_valid(struct vcpu_vmx *vmx) { return false; } static inline bool evmptr_is_set(u64 evmptr) { return false; } static inline bool nested_vmx_is_evmptr12_set(struct vcpu_vmx *vmx) { return false; } static inline struct hv_enlightened_vmcs *nested_vmx_evmcs(struct vcpu_vmx *vmx) { return NULL; } #endif #endif /* __KVM_X86_VMX_HYPERV_H */ |
2 10 1 3 6 6 1 1 39 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * User-mode machine state access * * Copyright (C) 2007 Red Hat, Inc. All rights reserved. * * Red Hat Author: Roland McGrath. */ #ifndef _LINUX_REGSET_H #define _LINUX_REGSET_H 1 #include <linux/compiler.h> #include <linux/types.h> #include <linux/bug.h> #include <linux/uaccess.h> struct task_struct; struct user_regset; struct membuf { void *p; size_t left; }; static inline int membuf_zero(struct membuf *s, size_t size) { if (s->left) { if (size > s->left) size = s->left; memset(s->p, 0, size); s->p += size; s->left -= size; } return s->left; } static inline int membuf_write(struct membuf *s, const void *v, size_t size) { if (s->left) { if (size > s->left) size = s->left; memcpy(s->p, v, size); s->p += size; s->left -= size; } return s->left; } static inline struct membuf membuf_at(const struct membuf *s, size_t offs) { struct membuf n = *s; if (offs > n.left) offs = n.left; n.p += offs; n.left -= offs; return n; } /* current s->p must be aligned for v; v must be a scalar */ #define membuf_store(s, v) \ ({ \ struct membuf *__s = (s); \ if (__s->left) { \ typeof(v) __v = (v); \ size_t __size = sizeof(__v); \ if (unlikely(__size > __s->left)) { \ __size = __s->left; \ memcpy(__s->p, &__v, __size); \ } else { \ *(typeof(__v + 0) *)__s->p = __v; \ } \ __s->p += __size; \ __s->left -= __size; \ } \ __s->left;}) /** * user_regset_active_fn - type of @active function in &struct user_regset * @target: thread being examined * @regset: regset being examined * * Return -%ENODEV if not available on the hardware found. * Return %0 if no interesting state in this thread. * Return >%0 number of @size units of interesting state. * Any get call fetching state beyond that number will * see the default initialization state for this data, * so a caller that knows what the default state is need * not copy it all out. * This call is optional; the pointer is %NULL if there * is no inexpensive check to yield a value < @n. */ typedef int user_regset_active_fn(struct task_struct *target, const struct user_regset *regset); typedef int user_regset_get2_fn(struct task_struct *target, const struct user_regset *regset, struct membuf to); /** * user_regset_set_fn - type of @set function in &struct user_regset * @target: thread being examined * @regset: regset being examined * @pos: offset into the regset data to access, in bytes * @count: amount of data to copy, in bytes * @kbuf: if not %NULL, a kernel-space pointer to copy from * @ubuf: if @kbuf is %NULL, a user-space pointer to copy from * * Store register values. Return %0 on success; -%EIO or -%ENODEV * are usual failure returns. The @pos and @count values are in * bytes, but must be properly aligned. If @kbuf is non-null, that * buffer is used and @ubuf is ignored. If @kbuf is %NULL, then * ubuf gives a userland pointer to access directly, and an -%EFAULT * return value is possible. */ typedef int user_regset_set_fn(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf); /** * user_regset_writeback_fn - type of @writeback function in &struct user_regset * @target: thread being examined * @regset: regset being examined * @immediate: zero if writeback at completion of next context switch is OK * * This call is optional; usually the pointer is %NULL. When * provided, there is some user memory associated with this regset's * hardware, such as memory backing cached register data on register * window machines; the regset's data controls what user memory is * used (e.g. via the stack pointer value). * * Write register data back to user memory. If the @immediate flag * is nonzero, it must be written to the user memory so uaccess or * access_process_vm() can see it when this call returns; if zero, * then it must be written back by the time the task completes a * context switch (as synchronized with wait_task_inactive()). * Return %0 on success or if there was nothing to do, -%EFAULT for * a memory problem (bad stack pointer or whatever), or -%EIO for a * hardware problem. */ typedef int user_regset_writeback_fn(struct task_struct *target, const struct user_regset *regset, int immediate); /** * struct user_regset - accessible thread CPU state * @n: Number of slots (registers). * @size: Size in bytes of a slot (register). * @align: Required alignment, in bytes. * @bias: Bias from natural indexing. * @core_note_type: ELF note @n_type value used in core dumps. * @get: Function to fetch values. * @set: Function to store values. * @active: Function to report if regset is active, or %NULL. * @writeback: Function to write data back to user memory, or %NULL. * * This data structure describes a machine resource we call a register set. * This is part of the state of an individual thread, not necessarily * actual CPU registers per se. A register set consists of a number of * similar slots, given by @n. Each slot is @size bytes, and aligned to * @align bytes (which is at least @size). For dynamically-sized * regsets, @n must contain the maximum possible number of slots for the * regset. * * For backward compatibility, the @get and @set methods must pad to, or * accept, @n * @size bytes, even if the current regset size is smaller. * The precise semantics of these operations depend on the regset being * accessed. * * The functions to which &struct user_regset members point must be * called only on the current thread or on a thread that is in * %TASK_STOPPED or %TASK_TRACED state, that we are guaranteed will not * be woken up and return to user mode, and that we have called * wait_task_inactive() on. (The target thread always might wake up for * SIGKILL while these functions are working, in which case that * thread's user_regset state might be scrambled.) * * The @pos argument must be aligned according to @align; the @count * argument must be a multiple of @size. These functions are not * responsible for checking for invalid arguments. * * When there is a natural value to use as an index, @bias gives the * difference between the natural index and the slot index for the * register set. For example, x86 GDT segment descriptors form a regset; * the segment selector produces a natural index, but only a subset of * that index space is available as a regset (the TLS slots); subtracting * @bias from a segment selector index value computes the regset slot. * * If nonzero, @core_note_type gives the n_type field (NT_* value) * of the core file note in which this regset's data appears. * NT_PRSTATUS is a special case in that the regset data starts at * offsetof(struct elf_prstatus, pr_reg) into the note data; that is * part of the per-machine ELF formats userland knows about. In * other cases, the core file note contains exactly the whole regset * (@n * @size) and nothing else. The core file note is normally * omitted when there is an @active function and it returns zero. */ struct user_regset { user_regset_get2_fn *regset_get; user_regset_set_fn *set; user_regset_active_fn *active; user_regset_writeback_fn *writeback; unsigned int n; unsigned int size; unsigned int align; unsigned int bias; unsigned int core_note_type; }; /** * struct user_regset_view - available regsets * @name: Identifier, e.g. UTS_MACHINE string. * @regsets: Array of @n regsets available in this view. * @n: Number of elements in @regsets. * @e_machine: ELF header @e_machine %EM_* value written in core dumps. * @e_flags: ELF header @e_flags value written in core dumps. * @ei_osabi: ELF header @e_ident[%EI_OSABI] value written in core dumps. * * A regset view is a collection of regsets (&struct user_regset, * above). This describes all the state of a thread that can be seen * from a given architecture/ABI environment. More than one view might * refer to the same &struct user_regset, or more than one regset * might refer to the same machine-specific state in the thread. For * example, a 32-bit thread's state could be examined from the 32-bit * view or from the 64-bit view. Either method reaches the same thread * register state, doing appropriate widening or truncation. */ struct user_regset_view { const char *name; const struct user_regset *regsets; unsigned int n; u32 e_flags; u16 e_machine; u8 ei_osabi; }; /* * This is documented here rather than at the definition sites because its * implementation is machine-dependent but its interface is universal. */ /** * task_user_regset_view - Return the process's native regset view. * @tsk: a thread of the process in question * * Return the &struct user_regset_view that is native for the given process. * For example, what it would access when it called ptrace(). * Throughout the life of the process, this only changes at exec. */ const struct user_regset_view *task_user_regset_view(struct task_struct *tsk); static inline int user_regset_copyin(unsigned int *pos, unsigned int *count, const void **kbuf, const void __user **ubuf, void *data, const int start_pos, const int end_pos) { if (*count == 0) return 0; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count : min(*count, end_pos - *pos)); data += *pos - start_pos; if (*kbuf) { memcpy(data, *kbuf, copy); *kbuf += copy; } else if (__copy_from_user(data, *ubuf, copy)) return -EFAULT; else *ubuf += copy; *pos += copy; *count -= copy; } return 0; } static inline void user_regset_copyin_ignore(unsigned int *pos, unsigned int *count, const void **kbuf, const void __user **ubuf, const int start_pos, const int end_pos) { if (*count == 0) return; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count : min(*count, end_pos - *pos)); if (*kbuf) *kbuf += copy; else *ubuf += copy; *pos += copy; *count -= copy; } } extern int regset_get(struct task_struct *target, const struct user_regset *regset, unsigned int size, void *data); extern int regset_get_alloc(struct task_struct *target, const struct user_regset *regset, unsigned int size, void **data); extern int copy_regset_to_user(struct task_struct *target, const struct user_regset_view *view, unsigned int setno, unsigned int offset, unsigned int size, void __user *data); /** * copy_regset_from_user - store into thread's user_regset data from user memory * @target: thread to be examined * @view: &struct user_regset_view describing user thread machine state * @setno: index in @view->regsets * @offset: offset into the regset data, in bytes * @size: amount of data to copy, in bytes * @data: user-mode pointer to copy from */ static inline int copy_regset_from_user(struct task_struct *target, const struct user_regset_view *view, unsigned int setno, unsigned int offset, unsigned int size, const void __user *data) { const struct user_regset *regset = &view->regsets[setno]; if (!regset->set) return -EOPNOTSUPP; if (!access_ok(data, size)) return -EFAULT; return regset->set(target, regset, offset, size, NULL, data); } #endif /* <linux/regset.h> */ |
3 8 3 7 3 3 5 2 3 3 11 11 90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | // SPDX-License-Identifier: GPL-2.0-or-later /* * SR-IPv6 implementation -- HMAC functions * * Author: * David Lebrun <david.lebrun@uclouvain.be> */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/icmpv6.h> #include <linux/mroute6.h> #include <linux/slab.h> #include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/xfrm.h> #include <crypto/hash.h> #include <net/seg6.h> #include <net/genetlink.h> #include <net/seg6_hmac.h> #include <linux/random.h> static DEFINE_PER_CPU(char [SEG6_HMAC_RING_SIZE], hmac_ring); static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { const struct seg6_hmac_info *hinfo = obj; return (hinfo->hmackeyid != *(__u32 *)arg->key); } static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo) { kfree_rcu(hinfo, rcu); } static void seg6_free_hi(void *ptr, void *arg) { struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr; if (hinfo) seg6_hinfo_release(hinfo); } static const struct rhashtable_params rht_params = { .head_offset = offsetof(struct seg6_hmac_info, node), .key_offset = offsetof(struct seg6_hmac_info, hmackeyid), .key_len = sizeof(u32), .automatic_shrinking = true, .obj_cmpfn = seg6_hmac_cmpfn, }; static struct seg6_hmac_algo hmac_algos[] = { { .alg_id = SEG6_HMAC_ALGO_SHA1, .name = "hmac(sha1)", }, { .alg_id = SEG6_HMAC_ALGO_SHA256, .name = "hmac(sha256)", }, }; static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) { struct sr6_tlv_hmac *tlv; if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5) return NULL; if (!sr_has_hmac(srh)) return NULL; tlv = (struct sr6_tlv_hmac *) ((char *)srh + ((srh->hdrlen + 1) << 3) - 40); if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38) return NULL; return tlv; } static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) { struct seg6_hmac_algo *algo; int i, alg_count; alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { algo = &hmac_algos[i]; if (algo->alg_id == alg_id) return algo; } return NULL; } static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, u8 *output, int outlen) { struct seg6_hmac_algo *algo; struct crypto_shash *tfm; struct shash_desc *shash; int ret, dgsize; algo = __hmac_get_algo(hinfo->alg_id); if (!algo) return -ENOENT; tfm = *this_cpu_ptr(algo->tfms); dgsize = crypto_shash_digestsize(tfm); if (dgsize > outlen) { pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", dgsize, outlen); return -ENOMEM; } ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); if (ret < 0) { pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); goto failed; } shash = *this_cpu_ptr(algo->shashs); shash->tfm = tfm; ret = crypto_shash_digest(shash, text, psize, output); if (ret < 0) { pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); goto failed; } return dgsize; failed: return ret; } int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, u8 *output) { __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; int plen, i, dgsize, wrsize; char *ring, *off; /* a 160-byte buffer for digest output allows to store highest known * hash function (RadioGatun) with up to 1216 bits */ /* saddr(16) + first_seg(1) + flags(1) + keyid(4) + seglist(16n) */ plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; /* this limit allows for 14 segments */ if (plen >= SEG6_HMAC_RING_SIZE) return -EMSGSIZE; /* Let's build the HMAC text on the ring buffer. The text is composed * as follows, in order: * * 1. Source IPv6 address (128 bits) * 2. first_segment value (8 bits) * 3. Flags (8 bits) * 4. HMAC Key ID (32 bits) * 5. All segments in the segments list (n * 128 bits) */ local_bh_disable(); ring = this_cpu_ptr(hmac_ring); off = ring; /* source address */ memcpy(off, saddr, 16); off += 16; /* first_segment value */ *off++ = hdr->first_segment; /* flags */ *off++ = hdr->flags; /* HMAC Key ID */ memcpy(off, &hmackeyid, 4); off += 4; /* all segments in the list */ for (i = 0; i < hdr->first_segment + 1; i++) { memcpy(off, hdr->segments + i, 16); off += 16; } dgsize = __do_hmac(hinfo, ring, plen, tmp_out, SEG6_HMAC_MAX_DIGESTSIZE); local_bh_enable(); if (dgsize < 0) return dgsize; wrsize = SEG6_HMAC_FIELD_LEN; if (wrsize > dgsize) wrsize = dgsize; memset(output, 0, SEG6_HMAC_FIELD_LEN); memcpy(output, tmp_out, wrsize); return 0; } EXPORT_SYMBOL(seg6_hmac_compute); /* checks if an incoming SR-enabled packet's HMAC status matches * the incoming policy. * * called with rcu_read_lock() */ bool seg6_hmac_validate_skb(struct sk_buff *skb) { u8 hmac_output[SEG6_HMAC_FIELD_LEN]; struct net *net = dev_net(skb->dev); struct seg6_hmac_info *hinfo; struct sr6_tlv_hmac *tlv; struct ipv6_sr_hdr *srh; struct inet6_dev *idev; int require_hmac; idev = __in6_dev_get(skb->dev); srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); tlv = seg6_get_tlv_hmac(srh); require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac); /* mandatory check but no tlv */ if (require_hmac > 0 && !tlv) return false; /* no check */ if (require_hmac < 0) return true; /* check only if present */ if (require_hmac == 0 && !tlv) return true; /* now, seg6_require_hmac >= 0 && tlv */ hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); if (!hinfo) return false; if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) return false; if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) return false; return true; } EXPORT_SYMBOL(seg6_hmac_validate_skb); /* called with rcu_read_lock() */ struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key) { struct seg6_pernet_data *sdata = seg6_pernet(net); struct seg6_hmac_info *hinfo; hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); return hinfo; } EXPORT_SYMBOL(seg6_hmac_info_lookup); int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) { struct seg6_pernet_data *sdata = seg6_pernet(net); int err; err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, rht_params); return err; } EXPORT_SYMBOL(seg6_hmac_info_add); int seg6_hmac_info_del(struct net *net, u32 key) { struct seg6_pernet_data *sdata = seg6_pernet(net); struct seg6_hmac_info *hinfo; int err = -ENOENT; hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); if (!hinfo) goto out; err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node, rht_params); if (err) goto out; seg6_hinfo_release(hinfo); out: return err; } EXPORT_SYMBOL(seg6_hmac_info_del); int seg6_push_hmac(struct net *net, struct in6_addr *saddr, struct ipv6_sr_hdr *srh) { struct seg6_hmac_info *hinfo; struct sr6_tlv_hmac *tlv; int err = -ENOENT; tlv = seg6_get_tlv_hmac(srh); if (!tlv) return -EINVAL; rcu_read_lock(); hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); if (!hinfo) goto out; memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN); err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac); out: rcu_read_unlock(); return err; } EXPORT_SYMBOL(seg6_push_hmac); static int seg6_hmac_init_algo(void) { struct seg6_hmac_algo *algo; struct crypto_shash *tfm; struct shash_desc *shash; int i, alg_count, cpu; int ret = -ENOMEM; alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { struct crypto_shash **p_tfm; int shsize; algo = &hmac_algos[i]; algo->tfms = alloc_percpu(struct crypto_shash *); if (!algo->tfms) goto error_out; for_each_possible_cpu(cpu) { tfm = crypto_alloc_shash(algo->name, 0, 0); if (IS_ERR(tfm)) { ret = PTR_ERR(tfm); goto error_out; } p_tfm = per_cpu_ptr(algo->tfms, cpu); *p_tfm = tfm; } p_tfm = raw_cpu_ptr(algo->tfms); tfm = *p_tfm; shsize = sizeof(*shash) + crypto_shash_descsize(tfm); algo->shashs = alloc_percpu(struct shash_desc *); if (!algo->shashs) goto error_out; for_each_possible_cpu(cpu) { shash = kzalloc_node(shsize, GFP_KERNEL, cpu_to_node(cpu)); if (!shash) goto error_out; *per_cpu_ptr(algo->shashs, cpu) = shash; } } return 0; error_out: seg6_hmac_exit(); return ret; } int __init seg6_hmac_init(void) { return seg6_hmac_init_algo(); } int __net_init seg6_hmac_net_init(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); return rhashtable_init(&sdata->hmac_infos, &rht_params); } void seg6_hmac_exit(void) { struct seg6_hmac_algo *algo = NULL; struct crypto_shash *tfm; struct shash_desc *shash; int i, alg_count, cpu; alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { algo = &hmac_algos[i]; if (algo->shashs) { for_each_possible_cpu(cpu) { shash = *per_cpu_ptr(algo->shashs, cpu); kfree(shash); } free_percpu(algo->shashs); } if (algo->tfms) { for_each_possible_cpu(cpu) { tfm = *per_cpu_ptr(algo->tfms, cpu); crypto_free_shash(tfm); } free_percpu(algo->tfms); } } } EXPORT_SYMBOL(seg6_hmac_exit); void __net_exit seg6_hmac_net_exit(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL); } EXPORT_SYMBOL(seg6_hmac_net_exit); |
19 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM dma #if !defined(_TRACE_DMA_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_DMA_H #include <linux/tracepoint.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <trace/events/mmflags.h> TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL); TRACE_DEFINE_ENUM(DMA_TO_DEVICE); TRACE_DEFINE_ENUM(DMA_FROM_DEVICE); TRACE_DEFINE_ENUM(DMA_NONE); #define decode_dma_data_direction(dir) \ __print_symbolic(dir, \ { DMA_BIDIRECTIONAL, "BIDIRECTIONAL" }, \ { DMA_TO_DEVICE, "TO_DEVICE" }, \ { DMA_FROM_DEVICE, "FROM_DEVICE" }, \ { DMA_NONE, "NONE" }) #define decode_dma_attrs(attrs) \ __print_flags(attrs, "|", \ { DMA_ATTR_WEAK_ORDERING, "WEAK_ORDERING" }, \ { DMA_ATTR_WRITE_COMBINE, "WRITE_COMBINE" }, \ { DMA_ATTR_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING" }, \ { DMA_ATTR_SKIP_CPU_SYNC, "SKIP_CPU_SYNC" }, \ { DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \ { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ { DMA_ATTR_NO_WARN, "NO_WARN" }, \ { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }) DECLARE_EVENT_CLASS(dma_map, TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __field(u64, phys_addr) __field(u64, dma_addr) __field(size_t, size) __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), TP_fast_assign( __assign_str(device); __entry->phys_addr = phys_addr; __entry->dma_addr = dma_addr; __entry->size = size; __entry->dir = dir; __entry->attrs = attrs; ), TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __entry->phys_addr, decode_dma_attrs(__entry->attrs)) ); #define DEFINE_MAP_EVENT(name) \ DEFINE_EVENT(dma_map, name, \ TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, \ size_t size, enum dma_data_direction dir, unsigned long attrs), \ TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)) DEFINE_MAP_EVENT(dma_map_page); DEFINE_MAP_EVENT(dma_map_resource); DECLARE_EVENT_CLASS(dma_unmap, TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, addr, size, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __field(u64, addr) __field(size_t, size) __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), TP_fast_assign( __assign_str(device); __entry->addr = addr; __entry->size = size; __entry->dir = dir; __entry->attrs = attrs; ), TP_printk("%s dir=%s dma_addr=%llx size=%zu attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __entry->addr, __entry->size, decode_dma_attrs(__entry->attrs)) ); #define DEFINE_UNMAP_EVENT(name) \ DEFINE_EVENT(dma_unmap, name, \ TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, \ enum dma_data_direction dir, unsigned long attrs), \ TP_ARGS(dev, addr, size, dir, attrs)) DEFINE_UNMAP_EVENT(dma_unmap_page); DEFINE_UNMAP_EVENT(dma_unmap_resource); DECLARE_EVENT_CLASS(dma_alloc_class, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, gfp_t flags, unsigned long attrs), TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __field(void *, virt_addr) __field(u64, dma_addr) __field(size_t, size) __field(gfp_t, flags) __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), TP_fast_assign( __assign_str(device); __entry->virt_addr = virt_addr; __entry->dma_addr = dma_addr; __entry->size = size; __entry->flags = flags; __entry->attrs = attrs; ), TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p flags=%s attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __entry->virt_addr, show_gfp_flags(__entry->flags), decode_dma_attrs(__entry->attrs)) ); #define DEFINE_ALLOC_EVENT(name) \ DEFINE_EVENT(dma_alloc_class, name, \ TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \ size_t size, enum dma_data_direction dir, gfp_t flags, \ unsigned long attrs), \ TP_ARGS(dev, virt_addr, dma_addr, size, dir, flags, attrs)) DEFINE_ALLOC_EVENT(dma_alloc); DEFINE_ALLOC_EVENT(dma_alloc_pages); DEFINE_ALLOC_EVENT(dma_alloc_sgt_err); TRACE_EVENT(dma_alloc_sgt, TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, enum dma_data_direction dir, gfp_t flags, unsigned long attrs), TP_ARGS(dev, sgt, size, dir, flags, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __dynamic_array(u64, phys_addrs, sgt->orig_nents) __field(u64, dma_addr) __field(size_t, size) __field(enum dma_data_direction, dir) __field(gfp_t, flags) __field(unsigned long, attrs) ), TP_fast_assign( struct scatterlist *sg; int i; __assign_str(device); for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); __entry->dma_addr = sg_dma_address(sgt->sgl); __entry->size = size; __entry->dir = dir; __entry->flags = flags; __entry->attrs = attrs; ), TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s flags=%s attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __print_array(__get_dynamic_array(phys_addrs), __get_dynamic_array_len(phys_addrs) / sizeof(u64), sizeof(u64)), show_gfp_flags(__entry->flags), decode_dma_attrs(__entry->attrs)) ); DECLARE_EVENT_CLASS(dma_free_class, TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __field(void *, virt_addr) __field(u64, dma_addr) __field(size_t, size) __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), TP_fast_assign( __assign_str(device); __entry->virt_addr = virt_addr; __entry->dma_addr = dma_addr; __entry->size = size; __entry->dir = dir; __entry->attrs = attrs; ), TP_printk("%s dir=%s dma_addr=%llx size=%zu virt_addr=%p attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __entry->virt_addr, decode_dma_attrs(__entry->attrs)) ); #define DEFINE_FREE_EVENT(name) \ DEFINE_EVENT(dma_free_class, name, \ TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, \ size_t size, enum dma_data_direction dir, unsigned long attrs), \ TP_ARGS(dev, virt_addr, dma_addr, size, dir, attrs)) DEFINE_FREE_EVENT(dma_free); DEFINE_FREE_EVENT(dma_free_pages); TRACE_EVENT(dma_free_sgt, TP_PROTO(struct device *dev, struct sg_table *sgt, size_t size, enum dma_data_direction dir), TP_ARGS(dev, sgt, size, dir), TP_STRUCT__entry( __string(device, dev_name(dev)) __dynamic_array(u64, phys_addrs, sgt->orig_nents) __field(u64, dma_addr) __field(size_t, size) __field(enum dma_data_direction, dir) ), TP_fast_assign( struct scatterlist *sg; int i; __assign_str(device); for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); __entry->dma_addr = sg_dma_address(sgt->sgl); __entry->size = size; __entry->dir = dir; ), TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size, __print_array(__get_dynamic_array(phys_addrs), __get_dynamic_array_len(phys_addrs) / sizeof(u64), sizeof(u64))) ); TRACE_EVENT(dma_map_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, int ents, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, sgl, nents, ents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __dynamic_array(u64, phys_addrs, nents) __dynamic_array(u64, dma_addrs, ents) __dynamic_array(unsigned int, lengths, ents) __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), TP_fast_assign( struct scatterlist *sg; int i; __assign_str(device); for_each_sg(sgl, sg, nents, i) ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); for_each_sg(sgl, sg, ents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = sg_dma_len(sg); } __entry->dir = dir; __entry->attrs = attrs; ), TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __print_array(__get_dynamic_array(dma_addrs), __get_dynamic_array_len(dma_addrs) / sizeof(u64), sizeof(u64)), __print_array(__get_dynamic_array(lengths), __get_dynamic_array_len(lengths) / sizeof(unsigned int), sizeof(unsigned int)), __print_array(__get_dynamic_array(phys_addrs), __get_dynamic_array_len(phys_addrs) / sizeof(u64), sizeof(u64)), decode_dma_attrs(__entry->attrs)) ); TRACE_EVENT(dma_map_sg_err, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, int err, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, sgl, nents, err, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __dynamic_array(u64, phys_addrs, nents) __field(int, err) __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), TP_fast_assign( struct scatterlist *sg; int i; __assign_str(device); for_each_sg(sgl, sg, nents, i) ((u64 *)__get_dynamic_array(phys_addrs))[i] = sg_phys(sg); __entry->err = err; __entry->dir = dir; __entry->attrs = attrs; ), TP_printk("%s dir=%s dma_addrs=%s err=%d attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __print_array(__get_dynamic_array(phys_addrs), __get_dynamic_array_len(phys_addrs) / sizeof(u64), sizeof(u64)), __entry->err, decode_dma_attrs(__entry->attrs)) ); TRACE_EVENT(dma_unmap_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs), TP_ARGS(dev, sgl, nents, dir, attrs), TP_STRUCT__entry( __string(device, dev_name(dev)) __dynamic_array(u64, addrs, nents) __field(enum dma_data_direction, dir) __field(unsigned long, attrs) ), TP_fast_assign( struct scatterlist *sg; int i; __assign_str(device); for_each_sg(sgl, sg, nents, i) ((u64 *)__get_dynamic_array(addrs))[i] = sg_phys(sg); __entry->dir = dir; __entry->attrs = attrs; ), TP_printk("%s dir=%s phys_addrs=%s attrs=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __print_array(__get_dynamic_array(addrs), __get_dynamic_array_len(addrs) / sizeof(u64), sizeof(u64)), decode_dma_attrs(__entry->attrs)) ); DECLARE_EVENT_CLASS(dma_sync_single, TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir), TP_ARGS(dev, dma_addr, size, dir), TP_STRUCT__entry( __string(device, dev_name(dev)) __field(u64, dma_addr) __field(size_t, size) __field(enum dma_data_direction, dir) ), TP_fast_assign( __assign_str(device); __entry->dma_addr = dma_addr; __entry->size = size; __entry->dir = dir; ), TP_printk("%s dir=%s dma_addr=%llx size=%zu", __get_str(device), decode_dma_data_direction(__entry->dir), __entry->dma_addr, __entry->size) ); #define DEFINE_SYNC_SINGLE_EVENT(name) \ DEFINE_EVENT(dma_sync_single, name, \ TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, \ enum dma_data_direction dir), \ TP_ARGS(dev, dma_addr, size, dir)) DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_cpu); DEFINE_SYNC_SINGLE_EVENT(dma_sync_single_for_device); DECLARE_EVENT_CLASS(dma_sync_sg, TP_PROTO(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir), TP_ARGS(dev, sgl, nents, dir), TP_STRUCT__entry( __string(device, dev_name(dev)) __dynamic_array(u64, dma_addrs, nents) __dynamic_array(unsigned int, lengths, nents) __field(enum dma_data_direction, dir) ), TP_fast_assign( struct scatterlist *sg; int i; __assign_str(device); for_each_sg(sgl, sg, nents, i) { ((u64 *)__get_dynamic_array(dma_addrs))[i] = sg_dma_address(sg); ((unsigned int *)__get_dynamic_array(lengths))[i] = sg_dma_len(sg); } __entry->dir = dir; ), TP_printk("%s dir=%s dma_addrs=%s sizes=%s", __get_str(device), decode_dma_data_direction(__entry->dir), __print_array(__get_dynamic_array(dma_addrs), __get_dynamic_array_len(dma_addrs) / sizeof(u64), sizeof(u64)), __print_array(__get_dynamic_array(lengths), __get_dynamic_array_len(lengths) / sizeof(unsigned int), sizeof(unsigned int))) ); #define DEFINE_SYNC_SG_EVENT(name) \ DEFINE_EVENT(dma_sync_sg, name, \ TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, \ enum dma_data_direction dir), \ TP_ARGS(dev, sg, nents, dir)) DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_cpu); DEFINE_SYNC_SG_EVENT(dma_sync_sg_for_device); #endif /* _TRACE_DMA_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
3 208 208 208 207 208 229 230 25 208 208 208 208 207 230 230 25 208 205 207 192 207 270 270 270 18 260 268 268 270 270 270 13 13 51 51 51 51 51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 | // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/mmu_notifier.c * * Copyright (C) 2008 Qumranet, Inc. * Copyright (C) 2008 SGI * Christoph Lameter <cl@linux.com> */ #include <linux/rculist.h> #include <linux/mmu_notifier.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/err.h> #include <linux/interval_tree.h> #include <linux/srcu.h> #include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/slab.h> #include "vma.h" /* global SRCU for all MMs */ DEFINE_STATIC_SRCU(srcu); #ifdef CONFIG_LOCKDEP struct lockdep_map __mmu_notifier_invalidate_range_start_map = { .name = "mmu_notifier_invalidate_range_start" }; #endif /* * The mmu_notifier_subscriptions structure is allocated and installed in * mm->notifier_subscriptions inside the mm_take_all_locks() protected * critical section and it's released only when mm_count reaches zero * in mmdrop(). */ struct mmu_notifier_subscriptions { /* all mmu notifiers registered in this mm are queued in this list */ struct hlist_head list; bool has_itree; /* to serialize the list modifications and hlist_unhashed */ spinlock_t lock; unsigned long invalidate_seq; unsigned long active_invalidate_ranges; struct rb_root_cached itree; wait_queue_head_t wq; struct hlist_head deferred_list; }; /* * This is a collision-retry read-side/write-side 'lock', a lot like a * seqcount, however this allows multiple write-sides to hold it at * once. Conceptually the write side is protecting the values of the PTEs in * this mm, such that PTES cannot be read into SPTEs (shadow PTEs) while any * writer exists. * * Note that the core mm creates nested invalidate_range_start()/end() regions * within the same thread, and runs invalidate_range_start()/end() in parallel * on multiple CPUs. This is designed to not reduce concurrency or block * progress on the mm side. * * As a secondary function, holding the full write side also serves to prevent * writers for the itree, this is an optimization to avoid extra locking * during invalidate_range_start/end notifiers. * * The write side has two states, fully excluded: * - mm->active_invalidate_ranges != 0 * - subscriptions->invalidate_seq & 1 == True (odd) * - some range on the mm_struct is being invalidated * - the itree is not allowed to change * * And partially excluded: * - mm->active_invalidate_ranges != 0 * - subscriptions->invalidate_seq & 1 == False (even) * - some range on the mm_struct is being invalidated * - the itree is allowed to change * * Operations on notifier_subscriptions->invalidate_seq (under spinlock): * seq |= 1 # Begin writing * seq++ # Release the writing state * seq & 1 # True if a writer exists * * The later state avoids some expensive work on inv_end in the common case of * no mmu_interval_notifier monitoring the VA. */ static bool mn_itree_is_invalidating(struct mmu_notifier_subscriptions *subscriptions) { lockdep_assert_held(&subscriptions->lock); return subscriptions->invalidate_seq & 1; } static struct mmu_interval_notifier * mn_itree_inv_start_range(struct mmu_notifier_subscriptions *subscriptions, const struct mmu_notifier_range *range, unsigned long *seq) { struct interval_tree_node *node; struct mmu_interval_notifier *res = NULL; spin_lock(&subscriptions->lock); subscriptions->active_invalidate_ranges++; node = interval_tree_iter_first(&subscriptions->itree, range->start, range->end - 1); if (node) { subscriptions->invalidate_seq |= 1; res = container_of(node, struct mmu_interval_notifier, interval_tree); } *seq = subscriptions->invalidate_seq; spin_unlock(&subscriptions->lock); return res; } static struct mmu_interval_notifier * mn_itree_inv_next(struct mmu_interval_notifier *interval_sub, const struct mmu_notifier_range *range) { struct interval_tree_node *node; node = interval_tree_iter_next(&interval_sub->interval_tree, range->start, range->end - 1); if (!node) return NULL; return container_of(node, struct mmu_interval_notifier, interval_tree); } static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) { struct mmu_interval_notifier *interval_sub; struct hlist_node *next; spin_lock(&subscriptions->lock); if (--subscriptions->active_invalidate_ranges || !mn_itree_is_invalidating(subscriptions)) { spin_unlock(&subscriptions->lock); return; } /* Make invalidate_seq even */ subscriptions->invalidate_seq++; /* * The inv_end incorporates a deferred mechanism like rtnl_unlock(). * Adds and removes are queued until the final inv_end happens then * they are progressed. This arrangement for tree updates is used to * avoid using a blocking lock during invalidate_range_start. */ hlist_for_each_entry_safe(interval_sub, next, &subscriptions->deferred_list, deferred_item) { if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); else interval_tree_remove(&interval_sub->interval_tree, &subscriptions->itree); hlist_del(&interval_sub->deferred_item); } spin_unlock(&subscriptions->lock); wake_up_all(&subscriptions->wq); } /** * mmu_interval_read_begin - Begin a read side critical section against a VA * range * @interval_sub: The interval subscription * * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a * collision-retry scheme similar to seqcount for the VA range under * subscription. If the mm invokes invalidation during the critical section * then mmu_interval_read_retry() will return true. * * This is useful to obtain shadow PTEs where teardown or setup of the SPTEs * require a blocking context. The critical region formed by this can sleep, * and the required 'user_lock' can also be a sleeping lock. * * The caller is required to provide a 'user_lock' to serialize both teardown * and setup. * * The return value should be passed to mmu_interval_read_retry(). */ unsigned long mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) { struct mmu_notifier_subscriptions *subscriptions = interval_sub->mm->notifier_subscriptions; unsigned long seq; bool is_invalidating; /* * If the subscription has a different seq value under the user_lock * than we started with then it has collided. * * If the subscription currently has the same seq value as the * subscriptions seq, then it is currently between * invalidate_start/end and is colliding. * * The locking looks broadly like this: * mn_itree_inv_start(): mmu_interval_read_begin(): * spin_lock * seq = READ_ONCE(interval_sub->invalidate_seq); * seq == subs->invalidate_seq * spin_unlock * spin_lock * seq = ++subscriptions->invalidate_seq * spin_unlock * op->invalidate(): * user_lock * mmu_interval_set_seq() * interval_sub->invalidate_seq = seq * user_unlock * * [Required: mmu_interval_read_retry() == true] * * mn_itree_inv_end(): * spin_lock * seq = ++subscriptions->invalidate_seq * spin_unlock * * user_lock * mmu_interval_read_retry(): * interval_sub->invalidate_seq != seq * user_unlock * * Barriers are not needed here as any races here are closed by an * eventual mmu_interval_read_retry(), which provides a barrier via the * user_lock. */ spin_lock(&subscriptions->lock); /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ seq = READ_ONCE(interval_sub->invalidate_seq); is_invalidating = seq == subscriptions->invalidate_seq; spin_unlock(&subscriptions->lock); /* * interval_sub->invalidate_seq must always be set to an odd value via * mmu_interval_set_seq() using the provided cur_seq from * mn_itree_inv_start_range(). This ensures that if seq does wrap we * will always clear the below sleep in some reasonable time as * subscriptions->invalidate_seq is even in the idle state. */ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (is_invalidating) wait_event(subscriptions->wq, READ_ONCE(subscriptions->invalidate_seq) != seq); /* * Notice that mmu_interval_read_retry() can already be true at this * point, avoiding loops here allows the caller to provide a global * time bound. */ return seq; } EXPORT_SYMBOL_GPL(mmu_interval_read_begin); static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions, struct mm_struct *mm) { struct mmu_notifier_range range = { .flags = MMU_NOTIFIER_RANGE_BLOCKABLE, .event = MMU_NOTIFY_RELEASE, .mm = mm, .start = 0, .end = ULONG_MAX, }; struct mmu_interval_notifier *interval_sub; unsigned long cur_seq; bool ret; for (interval_sub = mn_itree_inv_start_range(subscriptions, &range, &cur_seq); interval_sub; interval_sub = mn_itree_inv_next(interval_sub, &range)) { ret = interval_sub->ops->invalidate(interval_sub, &range, cur_seq); WARN_ON(!ret); } mn_itree_inv_end(subscriptions); } /* * This function can't run concurrently against mmu_notifier_register * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap * runs with mm_users == 0. Other tasks may still invoke mmu notifiers * in parallel despite there being no task using this mm any more, * through the vmas outside of the exit_mmap context, such as with * vmtruncate. This serializes against mmu_notifier_unregister with * the notifier_subscriptions->lock in addition to SRCU and it serializes * against the other mmu notifiers with SRCU. struct mmu_notifier_subscriptions * can't go away from under us as exit_mmap holds an mm_count pin * itself. */ static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions, struct mm_struct *mm) { struct mmu_notifier *subscription; int id; /* * SRCU here will block mmu_notifier_unregister until * ->release returns. */ id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) /* * If ->release runs before mmu_notifier_unregister it must be * handled, as it's the only way for the driver to flush all * existing sptes and stop the driver from establishing any more * sptes before all the pages in the mm are freed. */ if (subscription->ops->release) subscription->ops->release(subscription, mm); spin_lock(&subscriptions->lock); while (unlikely(!hlist_empty(&subscriptions->list))) { subscription = hlist_entry(subscriptions->list.first, struct mmu_notifier, hlist); /* * We arrived before mmu_notifier_unregister so * mmu_notifier_unregister will do nothing other than to wait * for ->release to finish and for mmu_notifier_unregister to * return. */ hlist_del_init_rcu(&subscription->hlist); } spin_unlock(&subscriptions->lock); srcu_read_unlock(&srcu, id); /* * synchronize_srcu here prevents mmu_notifier_release from returning to * exit_mmap (which would proceed with freeing all pages in the mm) * until the ->release method returns, if it was invoked by * mmu_notifier_unregister. * * The notifier_subscriptions can't go away from under us because * one mm_count is held by exit_mmap. */ synchronize_srcu(&srcu); } void __mmu_notifier_release(struct mm_struct *mm) { struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; if (subscriptions->has_itree) mn_itree_release(subscriptions, mm); if (!hlist_empty(&subscriptions->list)) mn_hlist_release(subscriptions, mm); } /* * If no young bitflag is supported by the hardware, ->clear_flush_young can * unmap the address and return 1 or 0 depending if the mapping previously * existed or not. */ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *subscription; int young = 0, id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->clear_flush_young) young |= subscription->ops->clear_flush_young( subscription, mm, start, end); } srcu_read_unlock(&srcu, id); return young; } int __mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *subscription; int young = 0, id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->clear_young) young |= subscription->ops->clear_young(subscription, mm, start, end); } srcu_read_unlock(&srcu, id); return young; } int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *subscription; int young = 0, id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->test_young) { young = subscription->ops->test_young(subscription, mm, address); if (young) break; } } srcu_read_unlock(&srcu, id); return young; } static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, const struct mmu_notifier_range *range) { struct mmu_interval_notifier *interval_sub; unsigned long cur_seq; for (interval_sub = mn_itree_inv_start_range(subscriptions, range, &cur_seq); interval_sub; interval_sub = mn_itree_inv_next(interval_sub, range)) { bool ret; ret = interval_sub->ops->invalidate(interval_sub, range, cur_seq); if (!ret) { if (WARN_ON(mmu_notifier_range_blockable(range))) continue; goto out_would_block; } } return 0; out_would_block: /* * On -EAGAIN the non-blocking caller is not allowed to call * invalidate_range_end() */ mn_itree_inv_end(subscriptions); return -EAGAIN; } static int mn_hlist_invalidate_range_start( struct mmu_notifier_subscriptions *subscriptions, struct mmu_notifier_range *range) { struct mmu_notifier *subscription; int ret = 0; int id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { const struct mmu_notifier_ops *ops = subscription->ops; if (ops->invalidate_range_start) { int _ret; if (!mmu_notifier_range_blockable(range)) non_block_start(); _ret = ops->invalidate_range_start(subscription, range); if (!mmu_notifier_range_blockable(range)) non_block_end(); if (_ret) { pr_info("%pS callback failed with %d in %sblockable context.\n", ops->invalidate_range_start, _ret, !mmu_notifier_range_blockable(range) ? "non-" : ""); WARN_ON(mmu_notifier_range_blockable(range) || _ret != -EAGAIN); /* * We call all the notifiers on any EAGAIN, * there is no way for a notifier to know if * its start method failed, thus a start that * does EAGAIN can't also do end. */ WARN_ON(ops->invalidate_range_end); ret = _ret; } } } if (ret) { /* * Must be non-blocking to get here. If there are multiple * notifiers and one or more failed start, any that succeeded * start are expecting their end to be called. Do so now. */ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (!subscription->ops->invalidate_range_end) continue; subscription->ops->invalidate_range_end(subscription, range); } } srcu_read_unlock(&srcu, id); return ret; } int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { struct mmu_notifier_subscriptions *subscriptions = range->mm->notifier_subscriptions; int ret; if (subscriptions->has_itree) { ret = mn_itree_invalidate(subscriptions, range); if (ret) return ret; } if (!hlist_empty(&subscriptions->list)) return mn_hlist_invalidate_range_start(subscriptions, range); return 0; } static void mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions, struct mmu_notifier_range *range) { struct mmu_notifier *subscription; int id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->invalidate_range_end) { if (!mmu_notifier_range_blockable(range)) non_block_start(); subscription->ops->invalidate_range_end(subscription, range); if (!mmu_notifier_range_blockable(range)) non_block_end(); } } srcu_read_unlock(&srcu, id); } void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { struct mmu_notifier_subscriptions *subscriptions = range->mm->notifier_subscriptions; lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); if (subscriptions->has_itree) mn_itree_inv_end(subscriptions); if (!hlist_empty(&subscriptions->list)) mn_hlist_invalidate_end(subscriptions, range); lock_map_release(&__mmu_notifier_invalidate_range_start_map); } void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *subscription; int id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->arch_invalidate_secondary_tlbs) subscription->ops->arch_invalidate_secondary_tlbs( subscription, mm, start, end); } srcu_read_unlock(&srcu, id); } /* * Same as mmu_notifier_register but here the caller must hold the mmap_lock in * write mode. A NULL mn signals the notifier is being registered for itree * mode. */ int __mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm) { struct mmu_notifier_subscriptions *subscriptions = NULL; int ret; mmap_assert_write_locked(mm); BUG_ON(atomic_read(&mm->mm_users) <= 0); /* * Subsystems should only register for invalidate_secondary_tlbs() or * invalidate_range_start()/end() callbacks, not both. */ if (WARN_ON_ONCE(subscription && (subscription->ops->arch_invalidate_secondary_tlbs && (subscription->ops->invalidate_range_start || subscription->ops->invalidate_range_end)))) return -EINVAL; if (!mm->notifier_subscriptions) { /* * kmalloc cannot be called under mm_take_all_locks(), but we * know that mm->notifier_subscriptions can't change while we * hold the write side of the mmap_lock. */ subscriptions = kzalloc( sizeof(struct mmu_notifier_subscriptions), GFP_KERNEL); if (!subscriptions) return -ENOMEM; INIT_HLIST_HEAD(&subscriptions->list); spin_lock_init(&subscriptions->lock); subscriptions->invalidate_seq = 2; subscriptions->itree = RB_ROOT_CACHED; init_waitqueue_head(&subscriptions->wq); INIT_HLIST_HEAD(&subscriptions->deferred_list); } ret = mm_take_all_locks(mm); if (unlikely(ret)) goto out_clean; /* * Serialize the update against mmu_notifier_unregister. A * side note: mmu_notifier_release can't run concurrently with * us because we hold the mm_users pin (either implicitly as * current->mm or explicitly with get_task_mm() or similar). * We can't race against any other mmu notifier method either * thanks to mm_take_all_locks(). * * release semantics on the initialization of the * mmu_notifier_subscriptions's contents are provided for unlocked * readers. acquire can only be used while holding the mmgrab or * mmget, and is safe because once created the * mmu_notifier_subscriptions is not freed until the mm is destroyed. * As above, users holding the mmap_lock or one of the * mm_take_all_locks() do not need to use acquire semantics. */ if (subscriptions) smp_store_release(&mm->notifier_subscriptions, subscriptions); if (subscription) { /* Pairs with the mmdrop in mmu_notifier_unregister_* */ mmgrab(mm); subscription->mm = mm; subscription->users = 1; spin_lock(&mm->notifier_subscriptions->lock); hlist_add_head_rcu(&subscription->hlist, &mm->notifier_subscriptions->list); spin_unlock(&mm->notifier_subscriptions->lock); } else mm->notifier_subscriptions->has_itree = true; mm_drop_all_locks(mm); BUG_ON(atomic_read(&mm->mm_users) <= 0); return 0; out_clean: kfree(subscriptions); return ret; } EXPORT_SYMBOL_GPL(__mmu_notifier_register); /** * mmu_notifier_register - Register a notifier on a mm * @subscription: The notifier to attach * @mm: The mm to attach the notifier to * * Must not hold mmap_lock nor any other VM related lock when calling * this registration function. Must also ensure mm_users can't go down * to zero while this runs to avoid races with mmu_notifier_release, * so mm has to be current->mm or the mm should be pinned safely such * as with get_task_mm(). If the mm is not current->mm, the mm_users * pin should be released by calling mmput after mmu_notifier_register * returns. * * mmu_notifier_unregister() or mmu_notifier_put() must be always called to * unregister the notifier. * * While the caller has a mmu_notifier get the subscription->mm pointer will remain * valid, and can be converted to an active mm pointer via mmget_not_zero(). */ int mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm) { int ret; mmap_write_lock(mm); ret = __mmu_notifier_register(subscription, mm); mmap_write_unlock(mm); return ret; } EXPORT_SYMBOL_GPL(mmu_notifier_register); static struct mmu_notifier * find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) { struct mmu_notifier *subscription; spin_lock(&mm->notifier_subscriptions->lock); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, lockdep_is_held(&mm->notifier_subscriptions->lock)) { if (subscription->ops != ops) continue; if (likely(subscription->users != UINT_MAX)) subscription->users++; else subscription = ERR_PTR(-EOVERFLOW); spin_unlock(&mm->notifier_subscriptions->lock); return subscription; } spin_unlock(&mm->notifier_subscriptions->lock); return NULL; } /** * mmu_notifier_get_locked - Return the single struct mmu_notifier for * the mm & ops * @ops: The operations struct being subscribe with * @mm : The mm to attach notifiers too * * This function either allocates a new mmu_notifier via * ops->alloc_notifier(), or returns an already existing notifier on the * list. The value of the ops pointer is used to determine when two notifiers * are the same. * * Each call to mmu_notifier_get() must be paired with a call to * mmu_notifier_put(). The caller must hold the write side of mm->mmap_lock. * * While the caller has a mmu_notifier get the mm pointer will remain valid, * and can be converted to an active mm pointer via mmget_not_zero(). */ struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, struct mm_struct *mm) { struct mmu_notifier *subscription; int ret; mmap_assert_write_locked(mm); if (mm->notifier_subscriptions) { subscription = find_get_mmu_notifier(mm, ops); if (subscription) return subscription; } subscription = ops->alloc_notifier(mm); if (IS_ERR(subscription)) return subscription; subscription->ops = ops; ret = __mmu_notifier_register(subscription, mm); if (ret) goto out_free; return subscription; out_free: subscription->ops->free_notifier(subscription); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(mmu_notifier_get_locked); /* this is called after the last mmu_notifier_unregister() returned */ void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { BUG_ON(!hlist_empty(&mm->notifier_subscriptions->list)); kfree(mm->notifier_subscriptions); mm->notifier_subscriptions = LIST_POISON1; /* debug */ } /* * This releases the mm_count pin automatically and frees the mm * structure if it was the last user of it. It serializes against * running mmu notifiers with SRCU and against mmu_notifier_unregister * with the unregister lock + SRCU. All sptes must be dropped before * calling mmu_notifier_unregister. ->release or any other notifier * method may be invoked concurrently with mmu_notifier_unregister, * and only after mmu_notifier_unregister returned we're guaranteed * that ->release or any other method can't run anymore. */ void mmu_notifier_unregister(struct mmu_notifier *subscription, struct mm_struct *mm) { BUG_ON(atomic_read(&mm->mm_count) <= 0); if (!hlist_unhashed(&subscription->hlist)) { /* * SRCU here will force exit_mmap to wait for ->release to * finish before freeing the pages. */ int id; id = srcu_read_lock(&srcu); /* * exit_mmap will block in mmu_notifier_release to guarantee * that ->release is called before freeing the pages. */ if (subscription->ops->release) subscription->ops->release(subscription, mm); srcu_read_unlock(&srcu, id); spin_lock(&mm->notifier_subscriptions->lock); /* * Can not use list_del_rcu() since __mmu_notifier_release * can delete it before we hold the lock. */ hlist_del_init_rcu(&subscription->hlist); spin_unlock(&mm->notifier_subscriptions->lock); } /* * Wait for any running method to finish, of course including * ->release if it was run by mmu_notifier_release instead of us. */ synchronize_srcu(&srcu); BUG_ON(atomic_read(&mm->mm_count) <= 0); mmdrop(mm); } EXPORT_SYMBOL_GPL(mmu_notifier_unregister); static void mmu_notifier_free_rcu(struct rcu_head *rcu) { struct mmu_notifier *subscription = container_of(rcu, struct mmu_notifier, rcu); struct mm_struct *mm = subscription->mm; subscription->ops->free_notifier(subscription); /* Pairs with the get in __mmu_notifier_register() */ mmdrop(mm); } /** * mmu_notifier_put - Release the reference on the notifier * @subscription: The notifier to act on * * This function must be paired with each mmu_notifier_get(), it releases the * reference obtained by the get. If this is the last reference then process * to free the notifier will be run asynchronously. * * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release * when the mm_struct is destroyed. Instead free_notifier is always called to * release any resources held by the user. * * As ops->release is not guaranteed to be called, the user must ensure that * all sptes are dropped, and no new sptes can be established before * mmu_notifier_put() is called. * * This function can be called from the ops->release callback, however the * caller must still ensure it is called pairwise with mmu_notifier_get(). * * Modules calling this function must call mmu_notifier_synchronize() in * their __exit functions to ensure the async work is completed. */ void mmu_notifier_put(struct mmu_notifier *subscription) { struct mm_struct *mm = subscription->mm; spin_lock(&mm->notifier_subscriptions->lock); if (WARN_ON(!subscription->users) || --subscription->users) goto out_unlock; hlist_del_init_rcu(&subscription->hlist); spin_unlock(&mm->notifier_subscriptions->lock); call_srcu(&srcu, &subscription->rcu, mmu_notifier_free_rcu); return; out_unlock: spin_unlock(&mm->notifier_subscriptions->lock); } EXPORT_SYMBOL_GPL(mmu_notifier_put); static int __mmu_interval_notifier_insert( struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, struct mmu_notifier_subscriptions *subscriptions, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { interval_sub->mm = mm; interval_sub->ops = ops; RB_CLEAR_NODE(&interval_sub->interval_tree.rb); interval_sub->interval_tree.start = start; /* * Note that the representation of the intervals in the interval tree * considers the ending point as contained in the interval. */ if (length == 0 || check_add_overflow(start, length - 1, &interval_sub->interval_tree.last)) return -EOVERFLOW; /* Must call with a mmget() held */ if (WARN_ON(atomic_read(&mm->mm_users) <= 0)) return -EINVAL; /* pairs with mmdrop in mmu_interval_notifier_remove() */ mmgrab(mm); /* * If some invalidate_range_start/end region is going on in parallel * we don't know what VA ranges are affected, so we must assume this * new range is included. * * If the itree is invalidating then we are not allowed to change * it. Retrying until invalidation is done is tricky due to the * possibility for live lock, instead defer the add to * mn_itree_inv_end() so this algorithm is deterministic. * * In all cases the value for the interval_sub->invalidate_seq should be * odd, see mmu_interval_read_begin() */ spin_lock(&subscriptions->lock); if (subscriptions->active_invalidate_ranges) { if (mn_itree_is_invalidating(subscriptions)) hlist_add_head(&interval_sub->deferred_item, &subscriptions->deferred_list); else { subscriptions->invalidate_seq |= 1; interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); } interval_sub->invalidate_seq = subscriptions->invalidate_seq; } else { WARN_ON(mn_itree_is_invalidating(subscriptions)); /* * The starting seq for a subscription not under invalidation * should be odd, not equal to the current invalidate_seq and * invalidate_seq should not 'wrap' to the new seq any time * soon. */ interval_sub->invalidate_seq = subscriptions->invalidate_seq - 1; interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); } spin_unlock(&subscriptions->lock); return 0; } /** * mmu_interval_notifier_insert - Insert an interval notifier * @interval_sub: Interval subscription to register * @start: Starting virtual address to monitor * @length: Length of the range to monitor * @mm: mm_struct to attach to * @ops: Interval notifier operations to be called on matching events * * This function subscribes the interval notifier for notifications from the * mm. Upon return the ops related to mmu_interval_notifier will be called * whenever an event that intersects with the given range occurs. * * Upon return the range_notifier may not be present in the interval tree yet. * The caller must use the normal interval notifier read flow via * mmu_interval_read_begin() to establish SPTEs for this range. */ int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { struct mmu_notifier_subscriptions *subscriptions; int ret; might_lock(&mm->mmap_lock); subscriptions = smp_load_acquire(&mm->notifier_subscriptions); if (!subscriptions || !subscriptions->has_itree) { ret = mmu_notifier_register(NULL, mm); if (ret) return ret; subscriptions = mm->notifier_subscriptions; } return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, start, length, ops); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert); int mmu_interval_notifier_insert_locked( struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; int ret; mmap_assert_write_locked(mm); if (!subscriptions || !subscriptions->has_itree) { ret = __mmu_notifier_register(NULL, mm); if (ret) return ret; subscriptions = mm->notifier_subscriptions; } return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, start, length, ops); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked); static bool mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions, unsigned long seq) { bool ret; spin_lock(&subscriptions->lock); ret = subscriptions->invalidate_seq != seq; spin_unlock(&subscriptions->lock); return ret; } /** * mmu_interval_notifier_remove - Remove a interval notifier * @interval_sub: Interval subscription to unregister * * This function must be paired with mmu_interval_notifier_insert(). It cannot * be called from any ops callback. * * Once this returns ops callbacks are no longer running on other CPUs and * will not be called in future. */ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) { struct mm_struct *mm = interval_sub->mm; struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; unsigned long seq = 0; might_sleep(); spin_lock(&subscriptions->lock); if (mn_itree_is_invalidating(subscriptions)) { /* * remove is being called after insert put this on the * deferred list, but before the deferred list was processed. */ if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) { hlist_del(&interval_sub->deferred_item); } else { hlist_add_head(&interval_sub->deferred_item, &subscriptions->deferred_list); seq = subscriptions->invalidate_seq; } } else { WARN_ON(RB_EMPTY_NODE(&interval_sub->interval_tree.rb)); interval_tree_remove(&interval_sub->interval_tree, &subscriptions->itree); } spin_unlock(&subscriptions->lock); /* * The possible sleep on progress in the invalidation requires the * caller not hold any locks held by invalidation callbacks. */ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (seq) wait_event(subscriptions->wq, mmu_interval_seq_released(subscriptions, seq)); /* pairs with mmgrab in mmu_interval_notifier_insert() */ mmdrop(mm); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_remove); /** * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed * * This function ensures that all outstanding async SRU work from * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops * associated with an unused mmu_notifier will no longer be called. * * Before using the caller must ensure that all of its mmu_notifiers have been * fully released via mmu_notifier_put(). * * Modules using the mmu_notifier_put() API should call this in their __exit * function to avoid module unloading races. */ void mmu_notifier_synchronize(void) { synchronize_srcu(&srcu); } EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); |
3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 | // SPDX-License-Identifier: GPL-2.0-only /*************************************************************************** * Copyright (C) 2010-2012 by Bruno Prémont <bonbons@linux-vserver.org> * * * * Based on Logitech G13 driver (v0.4) * * Copyright (C) 2009 by Rick L. Vinyard, Jr. <rvinyard@cs.nmsu.edu> * * * ***************************************************************************/ #include <linux/hid.h> #include <linux/vmalloc.h> #include <linux/fb.h> #include <linux/module.h> #include "hid-picolcd.h" /* Framebuffer * * The PicoLCD use a Topway LCD module of 256x64 pixel * This display area is tiled over 4 controllers with 8 tiles * each. Each tile has 8x64 pixel, each data byte representing * a 1-bit wide vertical line of the tile. * * The display can be updated at a tile granularity. * * Chip 1 Chip 2 Chip 3 Chip 4 * +----------------+----------------+----------------+----------------+ * | Tile 1 | Tile 1 | Tile 1 | Tile 1 | * +----------------+----------------+----------------+----------------+ * | Tile 2 | Tile 2 | Tile 2 | Tile 2 | * +----------------+----------------+----------------+----------------+ * ... * +----------------+----------------+----------------+----------------+ * | Tile 8 | Tile 8 | Tile 8 | Tile 8 | * +----------------+----------------+----------------+----------------+ */ #define PICOLCDFB_NAME "picolcdfb" #define PICOLCDFB_WIDTH (256) #define PICOLCDFB_HEIGHT (64) #define PICOLCDFB_SIZE (PICOLCDFB_WIDTH * PICOLCDFB_HEIGHT / 8) #define PICOLCDFB_UPDATE_RATE_LIMIT 10 #define PICOLCDFB_UPDATE_RATE_DEFAULT 2 /* Framebuffer visual structures */ static const struct fb_fix_screeninfo picolcdfb_fix = { .id = PICOLCDFB_NAME, .type = FB_TYPE_PACKED_PIXELS, .visual = FB_VISUAL_MONO01, .xpanstep = 0, .ypanstep = 0, .ywrapstep = 0, .line_length = PICOLCDFB_WIDTH / 8, .accel = FB_ACCEL_NONE, }; static const struct fb_var_screeninfo picolcdfb_var = { .xres = PICOLCDFB_WIDTH, .yres = PICOLCDFB_HEIGHT, .xres_virtual = PICOLCDFB_WIDTH, .yres_virtual = PICOLCDFB_HEIGHT, .width = 103, .height = 26, .bits_per_pixel = 1, .grayscale = 1, .red = { .offset = 0, .length = 1, .msb_right = 0, }, .green = { .offset = 0, .length = 1, .msb_right = 0, }, .blue = { .offset = 0, .length = 1, .msb_right = 0, }, .transp = { .offset = 0, .length = 0, .msb_right = 0, }, }; /* Send a given tile to PicoLCD */ static int picolcd_fb_send_tile(struct picolcd_data *data, u8 *vbitmap, int chip, int tile) { struct hid_report *report1, *report2; unsigned long flags; u8 *tdata; int i; report1 = picolcd_out_report(REPORT_LCD_CMD_DATA, data->hdev); if (!report1 || report1->maxfield != 1) return -ENODEV; report2 = picolcd_out_report(REPORT_LCD_DATA, data->hdev); if (!report2 || report2->maxfield != 1) return -ENODEV; spin_lock_irqsave(&data->lock, flags); if ((data->status & PICOLCD_FAILED)) { spin_unlock_irqrestore(&data->lock, flags); return -ENODEV; } hid_set_field(report1->field[0], 0, chip << 2); hid_set_field(report1->field[0], 1, 0x02); hid_set_field(report1->field[0], 2, 0x00); hid_set_field(report1->field[0], 3, 0x00); hid_set_field(report1->field[0], 4, 0xb8 | tile); hid_set_field(report1->field[0], 5, 0x00); hid_set_field(report1->field[0], 6, 0x00); hid_set_field(report1->field[0], 7, 0x40); hid_set_field(report1->field[0], 8, 0x00); hid_set_field(report1->field[0], 9, 0x00); hid_set_field(report1->field[0], 10, 32); hid_set_field(report2->field[0], 0, (chip << 2) | 0x01); hid_set_field(report2->field[0], 1, 0x00); hid_set_field(report2->field[0], 2, 0x00); hid_set_field(report2->field[0], 3, 32); tdata = vbitmap + (tile * 4 + chip) * 64; for (i = 0; i < 64; i++) if (i < 32) hid_set_field(report1->field[0], 11 + i, tdata[i]); else hid_set_field(report2->field[0], 4 + i - 32, tdata[i]); hid_hw_request(data->hdev, report1, HID_REQ_SET_REPORT); hid_hw_request(data->hdev, report2, HID_REQ_SET_REPORT); spin_unlock_irqrestore(&data->lock, flags); return 0; } /* Translate a single tile*/ static int picolcd_fb_update_tile(u8 *vbitmap, const u8 *bitmap, int bpp, int chip, int tile) { int i, b, changed = 0; u8 tdata[64]; u8 *vdata = vbitmap + (tile * 4 + chip) * 64; if (bpp == 1) { for (b = 7; b >= 0; b--) { const u8 *bdata = bitmap + tile * 256 + chip * 8 + b * 32; for (i = 0; i < 64; i++) { tdata[i] <<= 1; tdata[i] |= (bdata[i/8] >> (i % 8)) & 0x01; } } } else if (bpp == 8) { for (b = 7; b >= 0; b--) { const u8 *bdata = bitmap + (tile * 256 + chip * 8 + b * 32) * 8; for (i = 0; i < 64; i++) { tdata[i] <<= 1; tdata[i] |= (bdata[i] & 0x80) ? 0x01 : 0x00; } } } else { /* Oops, we should never get here! */ WARN_ON(1); return 0; } for (i = 0; i < 64; i++) if (tdata[i] != vdata[i]) { changed = 1; vdata[i] = tdata[i]; } return changed; } void picolcd_fb_refresh(struct picolcd_data *data) { if (data->fb_info) schedule_delayed_work(&data->fb_info->deferred_work, 0); } /* Reconfigure LCD display */ int picolcd_fb_reset(struct picolcd_data *data, int clear) { struct hid_report *report = picolcd_out_report(REPORT_LCD_CMD, data->hdev); struct picolcd_fb_data *fbdata = data->fb_info->par; int i, j; unsigned long flags; static const u8 mapcmd[8] = { 0x00, 0x02, 0x00, 0x64, 0x3f, 0x00, 0x64, 0xc0 }; if (!report || report->maxfield != 1) return -ENODEV; spin_lock_irqsave(&data->lock, flags); for (i = 0; i < 4; i++) { for (j = 0; j < report->field[0]->maxusage; j++) if (j == 0) hid_set_field(report->field[0], j, i << 2); else if (j < sizeof(mapcmd)) hid_set_field(report->field[0], j, mapcmd[j]); else hid_set_field(report->field[0], j, 0); hid_hw_request(data->hdev, report, HID_REQ_SET_REPORT); } spin_unlock_irqrestore(&data->lock, flags); if (clear) { memset(fbdata->vbitmap, 0, PICOLCDFB_SIZE); memset(fbdata->bitmap, 0, PICOLCDFB_SIZE*fbdata->bpp); } fbdata->force = 1; /* schedule first output of framebuffer */ if (fbdata->ready) schedule_delayed_work(&data->fb_info->deferred_work, 0); else fbdata->ready = 1; return 0; } /* Update fb_vbitmap from the screen_buffer and send changed tiles to device */ static void picolcd_fb_update(struct fb_info *info) { int chip, tile, n; unsigned long flags; struct picolcd_fb_data *fbdata = info->par; struct picolcd_data *data; mutex_lock(&info->lock); spin_lock_irqsave(&fbdata->lock, flags); if (!fbdata->ready && fbdata->picolcd) picolcd_fb_reset(fbdata->picolcd, 0); spin_unlock_irqrestore(&fbdata->lock, flags); /* * Translate the framebuffer into the format needed by the PicoLCD. * See display layout above. * Do this one tile after the other and push those tiles that changed. * * Wait for our IO to complete as otherwise we might flood the queue! */ n = 0; for (chip = 0; chip < 4; chip++) for (tile = 0; tile < 8; tile++) { if (!fbdata->force && !picolcd_fb_update_tile( fbdata->vbitmap, fbdata->bitmap, fbdata->bpp, chip, tile)) continue; n += 2; if (n >= HID_OUTPUT_FIFO_SIZE / 2) { spin_lock_irqsave(&fbdata->lock, flags); data = fbdata->picolcd; spin_unlock_irqrestore(&fbdata->lock, flags); mutex_unlock(&info->lock); if (!data) return; hid_hw_wait(data->hdev); mutex_lock(&info->lock); n = 0; } spin_lock_irqsave(&fbdata->lock, flags); data = fbdata->picolcd; spin_unlock_irqrestore(&fbdata->lock, flags); if (!data || picolcd_fb_send_tile(data, fbdata->vbitmap, chip, tile)) goto out; } fbdata->force = false; if (n) { spin_lock_irqsave(&fbdata->lock, flags); data = fbdata->picolcd; spin_unlock_irqrestore(&fbdata->lock, flags); mutex_unlock(&info->lock); if (data) hid_hw_wait(data->hdev); return; } out: mutex_unlock(&info->lock); } static int picolcd_fb_blank(int blank, struct fb_info *info) { /* We let fb notification do this for us via lcd/backlight device */ return 0; } static void picolcd_fb_destroy(struct fb_info *info) { struct picolcd_fb_data *fbdata = info->par; /* make sure no work is deferred */ fb_deferred_io_cleanup(info); /* No thirdparty should ever unregister our framebuffer! */ WARN_ON(fbdata->picolcd != NULL); vfree((u8 *)info->fix.smem_start); framebuffer_release(info); } static int picolcd_fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { __u32 bpp = var->bits_per_pixel; __u32 activate = var->activate; /* only allow 1/8 bit depth (8-bit is grayscale) */ *var = picolcdfb_var; var->activate = activate; if (bpp >= 8) { var->bits_per_pixel = 8; var->red.length = 8; var->green.length = 8; var->blue.length = 8; } else { var->bits_per_pixel = 1; var->red.length = 1; var->green.length = 1; var->blue.length = 1; } return 0; } static int picolcd_set_par(struct fb_info *info) { struct picolcd_fb_data *fbdata = info->par; u8 *tmp_fb, *o_fb; if (info->var.bits_per_pixel == fbdata->bpp) return 0; /* switch between 1/8 bit depths */ if (info->var.bits_per_pixel != 1 && info->var.bits_per_pixel != 8) return -EINVAL; o_fb = fbdata->bitmap; tmp_fb = kmalloc_array(PICOLCDFB_SIZE, info->var.bits_per_pixel, GFP_KERNEL); if (!tmp_fb) return -ENOMEM; /* translate FB content to new bits-per-pixel */ if (info->var.bits_per_pixel == 1) { int i, b; for (i = 0; i < PICOLCDFB_SIZE; i++) { u8 p = 0; for (b = 0; b < 8; b++) { p <<= 1; p |= o_fb[i*8+b] ? 0x01 : 0x00; } tmp_fb[i] = p; } memcpy(o_fb, tmp_fb, PICOLCDFB_SIZE); info->fix.visual = FB_VISUAL_MONO01; info->fix.line_length = PICOLCDFB_WIDTH / 8; } else { int i; memcpy(tmp_fb, o_fb, PICOLCDFB_SIZE); for (i = 0; i < PICOLCDFB_SIZE * 8; i++) o_fb[i] = tmp_fb[i/8] & (0x01 << (7 - i % 8)) ? 0xff : 0x00; info->fix.visual = FB_VISUAL_DIRECTCOLOR; info->fix.line_length = PICOLCDFB_WIDTH; } kfree(tmp_fb); fbdata->bpp = info->var.bits_per_pixel; return 0; } static void picolcdfb_ops_damage_range(struct fb_info *info, off_t off, size_t len) { if (!info->par) return; schedule_delayed_work(&info->deferred_work, 0); } static void picolcdfb_ops_damage_area(struct fb_info *info, u32 x, u32 y, u32 width, u32 height) { if (!info->par) return; schedule_delayed_work(&info->deferred_work, 0); } FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(picolcdfb_ops, picolcdfb_ops_damage_range, picolcdfb_ops_damage_area) static const struct fb_ops picolcdfb_ops = { .owner = THIS_MODULE, FB_DEFAULT_DEFERRED_OPS(picolcdfb_ops), .fb_destroy = picolcd_fb_destroy, .fb_blank = picolcd_fb_blank, .fb_check_var = picolcd_fb_check_var, .fb_set_par = picolcd_set_par, }; /* Callback from deferred IO workqueue */ static void picolcd_fb_deferred_io(struct fb_info *info, struct list_head *pagereflist) { picolcd_fb_update(info); } static const struct fb_deferred_io picolcd_fb_defio = { .delay = HZ / PICOLCDFB_UPDATE_RATE_DEFAULT, .deferred_io = picolcd_fb_deferred_io, }; /* * The "fb_update_rate" sysfs attribute */ static ssize_t picolcd_fb_update_rate_show(struct device *dev, struct device_attribute *attr, char *buf) { struct picolcd_data *data = dev_get_drvdata(dev); struct picolcd_fb_data *fbdata = data->fb_info->par; unsigned i, fb_update_rate = fbdata->update_rate; size_t ret = 0; for (i = 1; i <= PICOLCDFB_UPDATE_RATE_LIMIT; i++) if (i == fb_update_rate) ret += sysfs_emit_at(buf, ret, "[%u] ", i); else ret += sysfs_emit_at(buf, ret, "%u ", i); if (ret > 0) buf[min(ret, (size_t)PAGE_SIZE)-1] = '\n'; return ret; } static ssize_t picolcd_fb_update_rate_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct picolcd_data *data = dev_get_drvdata(dev); struct picolcd_fb_data *fbdata = data->fb_info->par; int i; unsigned u; if (count < 1 || count > 10) return -EINVAL; i = sscanf(buf, "%u", &u); if (i != 1) return -EINVAL; if (u > PICOLCDFB_UPDATE_RATE_LIMIT) return -ERANGE; else if (u == 0) u = PICOLCDFB_UPDATE_RATE_DEFAULT; fbdata->update_rate = u; data->fb_info->fbdefio->delay = HZ / fbdata->update_rate; return count; } static DEVICE_ATTR(fb_update_rate, 0664, picolcd_fb_update_rate_show, picolcd_fb_update_rate_store); /* initialize Framebuffer device */ int picolcd_init_framebuffer(struct picolcd_data *data) { struct device *dev = &data->hdev->dev; struct fb_info *info = NULL; struct picolcd_fb_data *fbdata = NULL; int i, error = -ENOMEM; u32 *palette; /* The extra memory is: * - 256*u32 for pseudo_palette * - struct fb_deferred_io */ info = framebuffer_alloc(256 * sizeof(u32) + sizeof(struct fb_deferred_io) + sizeof(struct picolcd_fb_data) + PICOLCDFB_SIZE, dev); if (!info) goto err_nomem; info->fbdefio = info->par; *info->fbdefio = picolcd_fb_defio; info->par += sizeof(struct fb_deferred_io); palette = info->par; info->par += 256 * sizeof(u32); for (i = 0; i < 256; i++) palette[i] = i > 0 && i < 16 ? 0xff : 0; info->pseudo_palette = palette; info->fbops = &picolcdfb_ops; info->var = picolcdfb_var; info->fix = picolcdfb_fix; info->fix.smem_len = PICOLCDFB_SIZE*8; #ifdef CONFIG_FB_BACKLIGHT #ifdef CONFIG_HID_PICOLCD_BACKLIGHT info->bl_dev = data->backlight; #endif #endif #ifdef CONFIG_HID_PICOLCD_LCD info->lcd_dev = data->lcd; #endif fbdata = info->par; spin_lock_init(&fbdata->lock); fbdata->picolcd = data; fbdata->update_rate = PICOLCDFB_UPDATE_RATE_DEFAULT; fbdata->bpp = picolcdfb_var.bits_per_pixel; fbdata->force = 1; fbdata->vbitmap = info->par + sizeof(struct picolcd_fb_data); fbdata->bitmap = vmalloc(PICOLCDFB_SIZE*8); if (fbdata->bitmap == NULL) { dev_err(dev, "can't get a free page for framebuffer\n"); goto err_nomem; } info->flags |= FBINFO_VIRTFB; info->screen_buffer = fbdata->bitmap; info->fix.smem_start = (unsigned long)fbdata->bitmap; memset(fbdata->vbitmap, 0xff, PICOLCDFB_SIZE); data->fb_info = info; error = picolcd_fb_reset(data, 1); if (error) { dev_err(dev, "failed to configure display\n"); goto err_cleanup; } error = device_create_file(dev, &dev_attr_fb_update_rate); if (error) { dev_err(dev, "failed to create sysfs attributes\n"); goto err_cleanup; } fb_deferred_io_init(info); error = register_framebuffer(info); if (error) { dev_err(dev, "failed to register framebuffer\n"); goto err_sysfs; } return 0; err_sysfs: device_remove_file(dev, &dev_attr_fb_update_rate); fb_deferred_io_cleanup(info); err_cleanup: data->fb_info = NULL; err_nomem: if (fbdata) vfree(fbdata->bitmap); framebuffer_release(info); return error; } void picolcd_exit_framebuffer(struct picolcd_data *data) { struct fb_info *info = data->fb_info; struct picolcd_fb_data *fbdata; unsigned long flags; if (!info) return; device_remove_file(&data->hdev->dev, &dev_attr_fb_update_rate); fbdata = info->par; /* disconnect framebuffer from HID dev */ spin_lock_irqsave(&fbdata->lock, flags); fbdata->picolcd = NULL; spin_unlock_irqrestore(&fbdata->lock, flags); /* make sure there is no running update - thus that fbdata->picolcd * once obtained under lock is guaranteed not to get free() under * the feet of the deferred work */ flush_delayed_work(&info->deferred_work); data->fb_info = NULL; unregister_framebuffer(info); } |
77 82 77 5 5 4 4 2 1 2 2 2 22 17 4 16 2 4 2 3 13 9 2 7 43 1 1 1 2 1 14 22 1 32 5 37 84 85 8 2 2 3 1 1 1 1 2 1 21 1 44 1 1 1 1 8 8 1 77 82 2 82 82 77 14 77 77 77 93 77 82 77 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 | // SPDX-License-Identifier: GPL-2.0-or-later /* i2c-dev.c - i2c-bus driver, char device interface Copyright (C) 1995-97 Simon G. Vogl Copyright (C) 1998-99 Frodo Looijaard <frodol@dds.nl> Copyright (C) 2003 Greg Kroah-Hartman <greg@kroah.com> */ /* Note that this is a complete rewrite of Simon Vogl's i2c-dev module. But I have used so much of his original code and ideas that it seems only fair to recognize him as co-author -- Frodo */ /* The I2C_RDWR ioctl code is written by Kolja Waschk <waschk@telos.de> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cdev.h> #include <linux/compat.h> #include <linux/device.h> #include <linux/fs.h> #include <linux/i2c-dev.h> #include <linux/i2c.h> #include <linux/init.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/module.h> #include <linux/notifier.h> #include <linux/slab.h> #include <linux/uaccess.h> /* * An i2c_dev represents an i2c_adapter ... an I2C or SMBus master, not a * slave (i2c_client) with which messages will be exchanged. It's coupled * with a character special file which is accessed by user mode drivers. * * The list of i2c_dev structures is parallel to the i2c_adapter lists * maintained by the driver model, and is updated using bus notifications. */ struct i2c_dev { struct list_head list; struct i2c_adapter *adap; struct device dev; struct cdev cdev; }; #define I2C_MINORS (MINORMASK + 1) static LIST_HEAD(i2c_dev_list); static DEFINE_SPINLOCK(i2c_dev_list_lock); static struct i2c_dev *i2c_dev_get_by_minor(unsigned index) { struct i2c_dev *i2c_dev; spin_lock(&i2c_dev_list_lock); list_for_each_entry(i2c_dev, &i2c_dev_list, list) { if (i2c_dev->adap->nr == index) goto found; } i2c_dev = NULL; found: spin_unlock(&i2c_dev_list_lock); return i2c_dev; } static struct i2c_dev *get_free_i2c_dev(struct i2c_adapter *adap) { struct i2c_dev *i2c_dev; if (adap->nr >= I2C_MINORS) { pr_err("Out of device minors (%d)\n", adap->nr); return ERR_PTR(-ENODEV); } i2c_dev = kzalloc(sizeof(*i2c_dev), GFP_KERNEL); if (!i2c_dev) return ERR_PTR(-ENOMEM); i2c_dev->adap = adap; spin_lock(&i2c_dev_list_lock); list_add_tail(&i2c_dev->list, &i2c_dev_list); spin_unlock(&i2c_dev_list_lock); return i2c_dev; } static void put_i2c_dev(struct i2c_dev *i2c_dev, bool del_cdev) { spin_lock(&i2c_dev_list_lock); list_del(&i2c_dev->list); spin_unlock(&i2c_dev_list_lock); if (del_cdev) cdev_device_del(&i2c_dev->cdev, &i2c_dev->dev); put_device(&i2c_dev->dev); } static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct i2c_dev *i2c_dev = i2c_dev_get_by_minor(MINOR(dev->devt)); if (!i2c_dev) return -ENODEV; return sysfs_emit(buf, "%s\n", i2c_dev->adap->name); } static DEVICE_ATTR_RO(name); static struct attribute *i2c_attrs[] = { &dev_attr_name.attr, NULL, }; ATTRIBUTE_GROUPS(i2c); /* ------------------------------------------------------------------------- */ /* * After opening an instance of this character special file, a file * descriptor starts out associated only with an i2c_adapter (and bus). * * Using the I2C_RDWR ioctl(), you can then *immediately* issue i2c_msg * traffic to any devices on the bus used by that adapter. That's because * the i2c_msg vectors embed all the addressing information they need, and * are submitted directly to an i2c_adapter. However, SMBus-only adapters * don't support that interface. * * To use read()/write() system calls on that file descriptor, or to use * SMBus interfaces (and work with SMBus-only hosts!), you must first issue * an I2C_SLAVE (or I2C_SLAVE_FORCE) ioctl. That configures an anonymous * (never registered) i2c_client so it holds the addressing information * needed by those system calls and by this SMBus interface. */ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { char *tmp; int ret; struct i2c_client *client = file->private_data; /* Adapter must support I2C transfers */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -EOPNOTSUPP; if (count > 8192) count = 8192; tmp = kzalloc(count, GFP_KERNEL); if (tmp == NULL) return -ENOMEM; pr_debug("i2c-%d reading %zu bytes.\n", iminor(file_inode(file)), count); ret = i2c_master_recv(client, tmp, count); if (ret >= 0) if (copy_to_user(buf, tmp, ret)) ret = -EFAULT; kfree(tmp); return ret; } static ssize_t i2cdev_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { int ret; char *tmp; struct i2c_client *client = file->private_data; /* Adapter must support I2C transfers */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -EOPNOTSUPP; if (count > 8192) count = 8192; tmp = memdup_user(buf, count); if (IS_ERR(tmp)) return PTR_ERR(tmp); pr_debug("i2c-%d writing %zu bytes.\n", iminor(file_inode(file)), count); ret = i2c_master_send(client, tmp, count); kfree(tmp); return ret; } static int i2cdev_check(struct device *dev, void *addrp) { struct i2c_client *client = i2c_verify_client(dev); if (!client || client->addr != *(unsigned int *)addrp) return 0; return dev->driver ? -EBUSY : 0; } /* walk up mux tree */ static int i2cdev_check_mux_parents(struct i2c_adapter *adapter, int addr) { struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter); int result; result = device_for_each_child(&adapter->dev, &addr, i2cdev_check); if (!result && parent) result = i2cdev_check_mux_parents(parent, addr); return result; } /* recurse down mux tree */ static int i2cdev_check_mux_children(struct device *dev, void *addrp) { int result; if (dev->type == &i2c_adapter_type) result = device_for_each_child(dev, addrp, i2cdev_check_mux_children); else result = i2cdev_check(dev, addrp); return result; } /* This address checking function differs from the one in i2c-core in that it considers an address with a registered device, but no driver bound to it, as NOT busy. */ static int i2cdev_check_addr(struct i2c_adapter *adapter, unsigned int addr) { struct i2c_adapter *parent = i2c_parent_is_i2c_adapter(adapter); int result = 0; if (parent) result = i2cdev_check_mux_parents(parent, addr); if (!result) result = device_for_each_child(&adapter->dev, &addr, i2cdev_check_mux_children); return result; } static noinline int i2cdev_ioctl_rdwr(struct i2c_client *client, unsigned nmsgs, struct i2c_msg *msgs) { u8 __user **data_ptrs; int i, res; /* Adapter must support I2C transfers */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -EOPNOTSUPP; data_ptrs = kmalloc_array(nmsgs, sizeof(u8 __user *), GFP_KERNEL); if (!data_ptrs) return -ENOMEM; res = 0; for (i = 0; i < nmsgs; i++) { /* Limit the size of the message to a sane amount */ if (msgs[i].len > 8192) { res = -EINVAL; break; } data_ptrs[i] = (u8 __user *)msgs[i].buf; msgs[i].buf = memdup_user(data_ptrs[i], msgs[i].len); if (IS_ERR(msgs[i].buf)) { res = PTR_ERR(msgs[i].buf); break; } /* memdup_user allocates with GFP_KERNEL, so DMA is ok */ msgs[i].flags |= I2C_M_DMA_SAFE; /* * If the message length is received from the slave (similar * to SMBus block read), we must ensure that the buffer will * be large enough to cope with a message length of * I2C_SMBUS_BLOCK_MAX as this is the maximum underlying bus * drivers allow. The first byte in the buffer must be * pre-filled with the number of extra bytes, which must be * at least one to hold the message length, but can be * greater (for example to account for a checksum byte at * the end of the message.) */ if (msgs[i].flags & I2C_M_RECV_LEN) { if (!(msgs[i].flags & I2C_M_RD) || msgs[i].len < 1 || msgs[i].buf[0] < 1 || msgs[i].len < msgs[i].buf[0] + I2C_SMBUS_BLOCK_MAX) { i++; res = -EINVAL; break; } msgs[i].len = msgs[i].buf[0]; } } if (res < 0) { int j; for (j = 0; j < i; ++j) kfree(msgs[j].buf); kfree(data_ptrs); return res; } res = i2c_transfer(client->adapter, msgs, nmsgs); while (i-- > 0) { if (res >= 0 && (msgs[i].flags & I2C_M_RD)) { if (copy_to_user(data_ptrs[i], msgs[i].buf, msgs[i].len)) res = -EFAULT; } kfree(msgs[i].buf); } kfree(data_ptrs); return res; } static noinline int i2cdev_ioctl_smbus(struct i2c_client *client, u8 read_write, u8 command, u32 size, union i2c_smbus_data __user *data) { union i2c_smbus_data temp = {}; int datasize, res; if ((size != I2C_SMBUS_BYTE) && (size != I2C_SMBUS_QUICK) && (size != I2C_SMBUS_BYTE_DATA) && (size != I2C_SMBUS_WORD_DATA) && (size != I2C_SMBUS_PROC_CALL) && (size != I2C_SMBUS_BLOCK_DATA) && (size != I2C_SMBUS_I2C_BLOCK_BROKEN) && (size != I2C_SMBUS_I2C_BLOCK_DATA) && (size != I2C_SMBUS_BLOCK_PROC_CALL)) { dev_dbg(&client->adapter->dev, "size out of range (%x) in ioctl I2C_SMBUS.\n", size); return -EINVAL; } /* Note that I2C_SMBUS_READ and I2C_SMBUS_WRITE are 0 and 1, so the check is valid if size==I2C_SMBUS_QUICK too. */ if ((read_write != I2C_SMBUS_READ) && (read_write != I2C_SMBUS_WRITE)) { dev_dbg(&client->adapter->dev, "read_write out of range (%x) in ioctl I2C_SMBUS.\n", read_write); return -EINVAL; } /* Note that command values are always valid! */ if ((size == I2C_SMBUS_QUICK) || ((size == I2C_SMBUS_BYTE) && (read_write == I2C_SMBUS_WRITE))) /* These are special: we do not use data */ return i2c_smbus_xfer(client->adapter, client->addr, client->flags, read_write, command, size, NULL); if (data == NULL) { dev_dbg(&client->adapter->dev, "data is NULL pointer in ioctl I2C_SMBUS.\n"); return -EINVAL; } if ((size == I2C_SMBUS_BYTE_DATA) || (size == I2C_SMBUS_BYTE)) datasize = sizeof(data->byte); else if ((size == I2C_SMBUS_WORD_DATA) || (size == I2C_SMBUS_PROC_CALL)) datasize = sizeof(data->word); else /* size == smbus block, i2c block, or block proc. call */ datasize = sizeof(data->block); if ((size == I2C_SMBUS_PROC_CALL) || (size == I2C_SMBUS_BLOCK_PROC_CALL) || (size == I2C_SMBUS_I2C_BLOCK_DATA) || (read_write == I2C_SMBUS_WRITE)) { if (copy_from_user(&temp, data, datasize)) return -EFAULT; } if (size == I2C_SMBUS_I2C_BLOCK_BROKEN) { /* Convert old I2C block commands to the new convention. This preserves binary compatibility. */ size = I2C_SMBUS_I2C_BLOCK_DATA; if (read_write == I2C_SMBUS_READ) temp.block[0] = I2C_SMBUS_BLOCK_MAX; } res = i2c_smbus_xfer(client->adapter, client->addr, client->flags, read_write, command, size, &temp); if (!res && ((size == I2C_SMBUS_PROC_CALL) || (size == I2C_SMBUS_BLOCK_PROC_CALL) || (read_write == I2C_SMBUS_READ))) { if (copy_to_user(data, &temp, datasize)) return -EFAULT; } return res; } static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct i2c_client *client = file->private_data; unsigned long funcs; dev_dbg(&client->adapter->dev, "ioctl, cmd=0x%02x, arg=0x%02lx\n", cmd, arg); switch (cmd) { case I2C_SLAVE: case I2C_SLAVE_FORCE: if ((arg > 0x3ff) || (((client->flags & I2C_M_TEN) == 0) && arg > 0x7f)) return -EINVAL; if (cmd == I2C_SLAVE && i2cdev_check_addr(client->adapter, arg)) return -EBUSY; /* REVISIT: address could become busy later */ client->addr = arg; return 0; case I2C_TENBIT: if (arg) client->flags |= I2C_M_TEN; else client->flags &= ~I2C_M_TEN; return 0; case I2C_PEC: /* * Setting the PEC flag here won't affect kernel drivers, * which will be using the i2c_client node registered with * the driver model core. Likewise, when that client has * the PEC flag already set, the i2c-dev driver won't see * (or use) this setting. */ if (arg) client->flags |= I2C_CLIENT_PEC; else client->flags &= ~I2C_CLIENT_PEC; return 0; case I2C_FUNCS: funcs = i2c_get_functionality(client->adapter); return put_user(funcs, (unsigned long __user *)arg); case I2C_RDWR: { struct i2c_rdwr_ioctl_data rdwr_arg; struct i2c_msg *rdwr_pa; int res; if (copy_from_user(&rdwr_arg, (struct i2c_rdwr_ioctl_data __user *)arg, sizeof(rdwr_arg))) return -EFAULT; if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0) return -EINVAL; /* * Put an arbitrary limit on the number of messages that can * be sent at once */ if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; rdwr_pa = memdup_array_user(rdwr_arg.msgs, rdwr_arg.nmsgs, sizeof(struct i2c_msg)); if (IS_ERR(rdwr_pa)) return PTR_ERR(rdwr_pa); res = i2cdev_ioctl_rdwr(client, rdwr_arg.nmsgs, rdwr_pa); kfree(rdwr_pa); return res; } case I2C_SMBUS: { struct i2c_smbus_ioctl_data data_arg; if (copy_from_user(&data_arg, (struct i2c_smbus_ioctl_data __user *) arg, sizeof(struct i2c_smbus_ioctl_data))) return -EFAULT; return i2cdev_ioctl_smbus(client, data_arg.read_write, data_arg.command, data_arg.size, data_arg.data); } case I2C_RETRIES: if (arg > INT_MAX) return -EINVAL; client->adapter->retries = arg; break; case I2C_TIMEOUT: if (arg > INT_MAX) return -EINVAL; /* For historical reasons, user-space sets the timeout * value in units of 10 ms. */ client->adapter->timeout = msecs_to_jiffies(arg * 10); break; default: /* NOTE: returning a fault code here could cause trouble * in buggy userspace code. Some old kernel bugs returned * zero in this case, and userspace code might accidentally * have depended on that bug. */ return -ENOTTY; } return 0; } #ifdef CONFIG_COMPAT struct i2c_smbus_ioctl_data32 { u8 read_write; u8 command; u32 size; compat_caddr_t data; /* union i2c_smbus_data *data */ }; struct i2c_msg32 { u16 addr; u16 flags; u16 len; compat_caddr_t buf; }; struct i2c_rdwr_ioctl_data32 { compat_caddr_t msgs; /* struct i2c_msg __user *msgs */ u32 nmsgs; }; static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct i2c_client *client = file->private_data; unsigned long funcs; switch (cmd) { case I2C_FUNCS: funcs = i2c_get_functionality(client->adapter); return put_user(funcs, (compat_ulong_t __user *)arg); case I2C_RDWR: { struct i2c_rdwr_ioctl_data32 rdwr_arg; struct i2c_msg32 __user *p; struct i2c_msg *rdwr_pa; int i, res; if (copy_from_user(&rdwr_arg, (struct i2c_rdwr_ioctl_data32 __user *)arg, sizeof(rdwr_arg))) return -EFAULT; if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0) return -EINVAL; if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; rdwr_pa = kmalloc_array(rdwr_arg.nmsgs, sizeof(struct i2c_msg), GFP_KERNEL); if (!rdwr_pa) return -ENOMEM; p = compat_ptr(rdwr_arg.msgs); for (i = 0; i < rdwr_arg.nmsgs; i++) { struct i2c_msg32 umsg; if (copy_from_user(&umsg, p + i, sizeof(umsg))) { kfree(rdwr_pa); return -EFAULT; } rdwr_pa[i] = (struct i2c_msg) { .addr = umsg.addr, .flags = umsg.flags, .len = umsg.len, .buf = (__force __u8 *)compat_ptr(umsg.buf), }; } res = i2cdev_ioctl_rdwr(client, rdwr_arg.nmsgs, rdwr_pa); kfree(rdwr_pa); return res; } case I2C_SMBUS: { struct i2c_smbus_ioctl_data32 data32; if (copy_from_user(&data32, (void __user *) arg, sizeof(data32))) return -EFAULT; return i2cdev_ioctl_smbus(client, data32.read_write, data32.command, data32.size, compat_ptr(data32.data)); } default: return i2cdev_ioctl(file, cmd, arg); } } #else #define compat_i2cdev_ioctl NULL #endif static int i2cdev_open(struct inode *inode, struct file *file) { unsigned int minor = iminor(inode); struct i2c_client *client; struct i2c_adapter *adap; adap = i2c_get_adapter(minor); if (!adap) return -ENODEV; /* This creates an anonymous i2c_client, which may later be * pointed to some address using I2C_SLAVE or I2C_SLAVE_FORCE. * * This client is ** NEVER REGISTERED ** with the driver model * or I2C core code!! It just holds private copies of addressing * information and maybe a PEC flag. */ client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) { i2c_put_adapter(adap); return -ENOMEM; } snprintf(client->name, I2C_NAME_SIZE, "i2c-dev %d", adap->nr); client->adapter = adap; file->private_data = client; return 0; } static int i2cdev_release(struct inode *inode, struct file *file) { struct i2c_client *client = file->private_data; i2c_put_adapter(client->adapter); kfree(client); file->private_data = NULL; return 0; } static const struct file_operations i2cdev_fops = { .owner = THIS_MODULE, .read = i2cdev_read, .write = i2cdev_write, .unlocked_ioctl = i2cdev_ioctl, .compat_ioctl = compat_i2cdev_ioctl, .open = i2cdev_open, .release = i2cdev_release, }; /* ------------------------------------------------------------------------- */ static const struct class i2c_dev_class = { .name = "i2c-dev", .dev_groups = i2c_groups, }; static void i2cdev_dev_release(struct device *dev) { struct i2c_dev *i2c_dev; i2c_dev = container_of(dev, struct i2c_dev, dev); kfree(i2c_dev); } static int i2cdev_attach_adapter(struct device *dev) { struct i2c_adapter *adap; struct i2c_dev *i2c_dev; int res; if (dev->type != &i2c_adapter_type) return NOTIFY_DONE; adap = to_i2c_adapter(dev); i2c_dev = get_free_i2c_dev(adap); if (IS_ERR(i2c_dev)) return NOTIFY_DONE; cdev_init(&i2c_dev->cdev, &i2cdev_fops); i2c_dev->cdev.owner = THIS_MODULE; device_initialize(&i2c_dev->dev); i2c_dev->dev.devt = MKDEV(I2C_MAJOR, adap->nr); i2c_dev->dev.class = &i2c_dev_class; i2c_dev->dev.parent = &adap->dev; i2c_dev->dev.release = i2cdev_dev_release; res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); if (res) goto err_put_i2c_dev; res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev); if (res) goto err_put_i2c_dev; pr_debug("adapter [%s] registered as minor %d\n", adap->name, adap->nr); return NOTIFY_OK; err_put_i2c_dev: put_i2c_dev(i2c_dev, false); return NOTIFY_DONE; } static int i2cdev_detach_adapter(struct device *dev) { struct i2c_adapter *adap; struct i2c_dev *i2c_dev; if (dev->type != &i2c_adapter_type) return NOTIFY_DONE; adap = to_i2c_adapter(dev); i2c_dev = i2c_dev_get_by_minor(adap->nr); if (!i2c_dev) /* attach_adapter must have failed */ return NOTIFY_DONE; put_i2c_dev(i2c_dev, true); pr_debug("adapter [%s] unregistered\n", adap->name); return NOTIFY_OK; } static int i2cdev_notifier_call(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; switch (action) { case BUS_NOTIFY_ADD_DEVICE: return i2cdev_attach_adapter(dev); case BUS_NOTIFY_DEL_DEVICE: return i2cdev_detach_adapter(dev); } return NOTIFY_DONE; } static struct notifier_block i2cdev_notifier = { .notifier_call = i2cdev_notifier_call, }; /* ------------------------------------------------------------------------- */ static int __init i2c_dev_attach_adapter(struct device *dev, void *dummy) { i2cdev_attach_adapter(dev); return 0; } static int __exit i2c_dev_detach_adapter(struct device *dev, void *dummy) { i2cdev_detach_adapter(dev); return 0; } /* * module load/unload record keeping */ static int __init i2c_dev_init(void) { int res; pr_info("i2c /dev entries driver\n"); res = register_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS, "i2c"); if (res) goto out; res = class_register(&i2c_dev_class); if (res) goto out_unreg_chrdev; /* Keep track of adapters which will be added or removed later */ res = bus_register_notifier(&i2c_bus_type, &i2cdev_notifier); if (res) goto out_unreg_class; /* Bind to already existing adapters right away */ i2c_for_each_dev(NULL, i2c_dev_attach_adapter); return 0; out_unreg_class: class_unregister(&i2c_dev_class); out_unreg_chrdev: unregister_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS); out: pr_err("Driver Initialisation failed\n"); return res; } static void __exit i2c_dev_exit(void) { bus_unregister_notifier(&i2c_bus_type, &i2cdev_notifier); i2c_for_each_dev(NULL, i2c_dev_detach_adapter); class_unregister(&i2c_dev_class); unregister_chrdev_region(MKDEV(I2C_MAJOR, 0), I2C_MINORS); } MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>"); MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>"); MODULE_DESCRIPTION("I2C /dev entries driver"); MODULE_LICENSE("GPL"); module_init(i2c_dev_init); module_exit(i2c_dev_exit); |
19 1 1 8 2 2 3 1 9 1 3 5 2 2 1 3 3 3 3 7 7 19 10 10 3 3 8 10 25 6 5 4 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 | /* Key type used to cache DNS lookups made by the kernel * * See Documentation/networking/dns_resolver.rst * * Copyright (c) 2007 Igor Mammedov * Author(s): Igor Mammedov (niallain@gmail.com) * Steve French (sfrench@us.ibm.com) * Wang Lei (wang840925@gmail.com) * David Howells (dhowells@redhat.com) * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/keyctl.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/dns_resolver.h> #include <keys/dns_resolver-type.h> #include <keys/user-type.h> #include "internal.h" MODULE_DESCRIPTION("DNS Resolver"); MODULE_AUTHOR("Wang Lei"); MODULE_LICENSE("GPL"); unsigned int dns_resolver_debug; module_param_named(debug, dns_resolver_debug, uint, 0644); MODULE_PARM_DESC(debug, "DNS Resolver debugging mask"); const struct cred *dns_resolver_cache; #define DNS_ERRORNO_OPTION "dnserror" /* * Preparse instantiation data for a dns_resolver key. * * For normal hostname lookups, the data must be a NUL-terminated string, with * the NUL char accounted in datalen. * * If the data contains a '#' characters, then we take the clause after each * one to be an option of the form 'key=value'. The actual data of interest is * the string leading up to the first '#'. For instance: * * "ip1,ip2,...#foo=bar" * * For server list requests, the data must begin with a NUL char and be * followed by a byte indicating the version of the data format. Version 1 * looks something like (note this is packed): * * u8 Non-string marker (ie. 0) * u8 Content (DNS_PAYLOAD_IS_*) * u8 Version (e.g. 1) * u8 Source of server list * u8 Lookup status of server list * u8 Number of servers * foreach-server { * __le16 Name length * __le16 Priority (as per SRV record, low first) * __le16 Weight (as per SRV record, higher first) * __le16 Port * u8 Source of address list * u8 Lookup status of address list * u8 Protocol (DNS_SERVER_PROTOCOL_*) * u8 Number of addresses * char[] Name (not NUL-terminated) * foreach-address { * u8 Family (DNS_ADDRESS_IS_*) * union { * u8[4] ipv4_addr * u8[16] ipv6_addr * } * } * } * */ static int dns_resolver_preparse(struct key_preparsed_payload *prep) { struct user_key_payload *upayload; unsigned long derrno; int ret; int datalen = prep->datalen, result_len = 0; const char *data = prep->data, *end, *opt; if (datalen <= 1 || !data) return -EINVAL; if (data[0] == 0) { const struct dns_server_list_v1_header *v1; /* It may be a server list. */ if (datalen < sizeof(*v1)) return -EINVAL; v1 = (const struct dns_server_list_v1_header *)data; kenter("[%u,%u],%u", v1->hdr.content, v1->hdr.version, datalen); if (v1->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST) { pr_warn_ratelimited( "dns_resolver: Unsupported content type (%u)\n", v1->hdr.content); return -EINVAL; } if (v1->hdr.version != 1) { pr_warn_ratelimited( "dns_resolver: Unsupported server list version (%u)\n", v1->hdr.version); return -EINVAL; } if ((v1->status != DNS_LOOKUP_GOOD && v1->status != DNS_LOOKUP_GOOD_WITH_BAD)) { if (prep->expiry == TIME64_MAX) prep->expiry = ktime_get_real_seconds() + 1; } result_len = datalen; goto store_result; } kenter("'%*.*s',%u", datalen, datalen, data, datalen); if (!data || data[datalen - 1] != '\0') return -EINVAL; datalen--; /* deal with any options embedded in the data */ end = data + datalen; opt = memchr(data, '#', datalen); if (!opt) { /* no options: the entire data is the result */ kdebug("no options"); result_len = datalen; } else { const char *next_opt; result_len = opt - data; opt++; kdebug("options: '%s'", opt); do { int opt_len, opt_nlen; const char *eq; char optval[128]; next_opt = memchr(opt, '#', end - opt) ?: end; opt_len = next_opt - opt; if (opt_len <= 0 || opt_len > sizeof(optval)) { pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n", opt_len); return -EINVAL; } eq = memchr(opt, '=', opt_len); if (eq) { opt_nlen = eq - opt; eq++; memcpy(optval, eq, next_opt - eq); optval[next_opt - eq] = '\0'; } else { opt_nlen = opt_len; optval[0] = '\0'; } kdebug("option '%*.*s' val '%s'", opt_nlen, opt_nlen, opt, optval); /* see if it's an error number representing a DNS error * that's to be recorded as the result in this key */ if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 && memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) { kdebug("dns error number option"); ret = kstrtoul(optval, 10, &derrno); if (ret < 0) goto bad_option_value; if (derrno < 1 || derrno > 511) goto bad_option_value; kdebug("dns error no. = %lu", derrno); prep->payload.data[dns_key_error] = ERR_PTR(-derrno); continue; } bad_option_value: pr_warn_ratelimited("Option '%*.*s' to dns_resolver key: bad/missing value\n", opt_nlen, opt_nlen, opt); return -EINVAL; } while (opt = next_opt + 1, opt < end); } /* don't cache the result if we're caching an error saying there's no * result */ if (prep->payload.data[dns_key_error]) { kleave(" = 0 [h_error %ld]", PTR_ERR(prep->payload.data[dns_key_error])); return 0; } store_result: kdebug("store result"); prep->quotalen = result_len; upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); if (!upayload) { kleave(" = -ENOMEM"); return -ENOMEM; } upayload->datalen = result_len; memcpy(upayload->data, data, result_len); upayload->data[result_len] = '\0'; prep->payload.data[dns_key_data] = upayload; kleave(" = 0"); return 0; } /* * Clean up the preparse data */ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep) { pr_devel("==>%s()\n", __func__); kfree(prep->payload.data[dns_key_data]); } /* * The description is of the form "[<type>:]<domain_name>" * * The domain name may be a simple name or an absolute domain name (which * should end with a period). The domain name is case-independent. */ static bool dns_resolver_cmp(const struct key *key, const struct key_match_data *match_data) { int slen, dlen, ret = 0; const char *src = key->description, *dsp = match_data->raw_data; kenter("%s,%s", src, dsp); if (!src || !dsp) goto no_match; if (strcasecmp(src, dsp) == 0) goto matched; slen = strlen(src); dlen = strlen(dsp); if (slen <= 0 || dlen <= 0) goto no_match; if (src[slen - 1] == '.') slen--; if (dsp[dlen - 1] == '.') dlen--; if (slen != dlen || strncasecmp(src, dsp, slen) != 0) goto no_match; matched: ret = 1; no_match: kleave(" = %d", ret); return ret; } /* * Preparse the match criterion. */ static int dns_resolver_match_preparse(struct key_match_data *match_data) { match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE; match_data->cmp = dns_resolver_cmp; return 0; } /* * Describe a DNS key */ static void dns_resolver_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); if (key_is_positive(key)) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) seq_printf(m, ": %d", err); else seq_printf(m, ": %u", key->datalen); } } /* * read the DNS data * - the key's semaphore is read-locked */ static long dns_resolver_read(const struct key *key, char *buffer, size_t buflen) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) return err; return user_read(key, buffer, buflen); } struct key_type key_type_dns_resolver = { .name = "dns_resolver", .flags = KEY_TYPE_NET_DOMAIN | KEY_TYPE_INSTANT_REAP, .preparse = dns_resolver_preparse, .free_preparse = dns_resolver_free_preparse, .instantiate = generic_key_instantiate, .match_preparse = dns_resolver_match_preparse, .revoke = user_revoke, .destroy = user_destroy, .describe = dns_resolver_describe, .read = dns_resolver_read, }; static int __init init_dns_resolver(void) { struct cred *cred; struct key *keyring; int ret; /* create an override credential set with a special thread keyring in * which DNS requests are cached * * this is used to prevent malicious redirections from being installed * with add_key(). */ cred = prepare_kernel_cred(&init_task); if (!cred) return -ENOMEM; keyring = keyring_alloc(".dns_resolver", GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto failed_put_cred; } ret = register_key_type(&key_type_dns_resolver); if (ret < 0) goto failed_put_key; /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; dns_resolver_cache = cred; kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); return 0; failed_put_key: key_put(keyring); failed_put_cred: put_cred(cred); return ret; } static void __exit exit_dns_resolver(void) { key_revoke(dns_resolver_cache->thread_keyring); unregister_key_type(&key_type_dns_resolver); put_cred(dns_resolver_cache); } module_init(init_dns_resolver) module_exit(exit_dns_resolver) MODULE_LICENSE("GPL"); |
3 2 1 3 3 4 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | // SPDX-License-Identifier: GPL-2.0-or-later /* * corsair-cpro.c - Linux driver for Corsair Commander Pro * Copyright (C) 2020 Marius Zachmann <mail@mariuszachmann.de> * * This driver uses hid reports to communicate with the device to allow hidraw userspace drivers * still being used. The device does not use report ids. When using hidraw and this driver * simultaniously, reports could be switched. */ #include <linux/bitops.h> #include <linux/completion.h> #include <linux/debugfs.h> #include <linux/hid.h> #include <linux/hwmon.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #define USB_VENDOR_ID_CORSAIR 0x1b1c #define USB_PRODUCT_ID_CORSAIR_COMMANDERPRO 0x0c10 #define USB_PRODUCT_ID_CORSAIR_1000D 0x1d00 #define OUT_BUFFER_SIZE 63 #define IN_BUFFER_SIZE 16 #define LABEL_LENGTH 11 #define REQ_TIMEOUT 300 #define CTL_GET_FW_VER 0x02 /* returns the firmware version in bytes 1-3 */ #define CTL_GET_BL_VER 0x06 /* returns the bootloader version in bytes 1-2 */ #define CTL_GET_TMP_CNCT 0x10 /* * returns in bytes 1-4 for each temp sensor: * 0 not connected * 1 connected */ #define CTL_GET_TMP 0x11 /* * send: byte 1 is channel, rest zero * rcv: returns temp for channel in centi-degree celsius * in bytes 1 and 2 * returns 0x11 in byte 0 if no sensor is connected */ #define CTL_GET_VOLT 0x12 /* * send: byte 1 is rail number: 0 = 12v, 1 = 5v, 2 = 3.3v * rcv: returns millivolt in bytes 1,2 * returns error 0x10 if request is invalid */ #define CTL_GET_FAN_CNCT 0x20 /* * returns in bytes 1-6 for each fan: * 0 not connected * 1 3pin * 2 4pin */ #define CTL_GET_FAN_RPM 0x21 /* * send: byte 1 is channel, rest zero * rcv: returns rpm in bytes 1,2 */ #define CTL_GET_FAN_PWM 0x22 /* * send: byte 1 is channel, rest zero * rcv: returns pwm in byte 1 if it was set * returns error 0x12 if fan is controlled via * fan_target or fan curve */ #define CTL_SET_FAN_FPWM 0x23 /* * set fixed pwm * send: byte 1 is fan number * send: byte 2 is percentage from 0 - 100 */ #define CTL_SET_FAN_TARGET 0x24 /* * set target rpm * send: byte 1 is fan number * send: byte 2-3 is target * device accepts all values from 0x00 - 0xFFFF */ #define NUM_FANS 6 #define NUM_TEMP_SENSORS 4 struct ccp_device { struct hid_device *hdev; struct device *hwmon_dev; struct dentry *debugfs; /* For reinitializing the completion below */ spinlock_t wait_input_report_lock; struct completion wait_input_report; struct mutex mutex; /* whenever buffer is used, lock before send_usb_cmd */ u8 *cmd_buffer; u8 *buffer; int target[6]; DECLARE_BITMAP(temp_cnct, NUM_TEMP_SENSORS); DECLARE_BITMAP(fan_cnct, NUM_FANS); char fan_label[6][LABEL_LENGTH]; u8 firmware_ver[3]; u8 bootloader_ver[2]; }; /* converts response error in buffer to errno */ static int ccp_get_errno(struct ccp_device *ccp) { switch (ccp->buffer[0]) { case 0x00: /* success */ return 0; case 0x01: /* called invalid command */ return -EOPNOTSUPP; case 0x10: /* called GET_VOLT / GET_TMP with invalid arguments */ return -EINVAL; case 0x11: /* requested temps of disconnected sensors */ case 0x12: /* requested pwm of not pwm controlled channels */ return -ENODATA; default: hid_dbg(ccp->hdev, "unknown device response error: %d", ccp->buffer[0]); return -EIO; } } /* send command, check for error in response, response in ccp->buffer */ static int send_usb_cmd(struct ccp_device *ccp, u8 command, u8 byte1, u8 byte2, u8 byte3) { unsigned long t; int ret; memset(ccp->cmd_buffer, 0x00, OUT_BUFFER_SIZE); ccp->cmd_buffer[0] = command; ccp->cmd_buffer[1] = byte1; ccp->cmd_buffer[2] = byte2; ccp->cmd_buffer[3] = byte3; /* * Disable raw event parsing for a moment to safely reinitialize the * completion. Reinit is done because hidraw could have triggered * the raw event parsing and marked the ccp->wait_input_report * completion as done. */ spin_lock_bh(&ccp->wait_input_report_lock); reinit_completion(&ccp->wait_input_report); spin_unlock_bh(&ccp->wait_input_report_lock); ret = hid_hw_output_report(ccp->hdev, ccp->cmd_buffer, OUT_BUFFER_SIZE); if (ret < 0) return ret; t = wait_for_completion_timeout(&ccp->wait_input_report, msecs_to_jiffies(REQ_TIMEOUT)); if (!t) return -ETIMEDOUT; return ccp_get_errno(ccp); } static int ccp_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct ccp_device *ccp = hid_get_drvdata(hdev); /* only copy buffer when requested */ spin_lock(&ccp->wait_input_report_lock); if (!completion_done(&ccp->wait_input_report)) { memcpy(ccp->buffer, data, min(IN_BUFFER_SIZE, size)); complete_all(&ccp->wait_input_report); } spin_unlock(&ccp->wait_input_report_lock); return 0; } /* requests and returns single data values depending on channel */ static int get_data(struct ccp_device *ccp, int command, int channel, bool two_byte_data) { int ret; mutex_lock(&ccp->mutex); ret = send_usb_cmd(ccp, command, channel, 0, 0); if (ret) goto out_unlock; ret = ccp->buffer[1]; if (two_byte_data) ret = (ret << 8) + ccp->buffer[2]; out_unlock: mutex_unlock(&ccp->mutex); return ret; } static int set_pwm(struct ccp_device *ccp, int channel, long val) { int ret; if (val < 0 || val > 255) return -EINVAL; /* The Corsair Commander Pro uses values from 0-100 */ val = DIV_ROUND_CLOSEST(val * 100, 255); mutex_lock(&ccp->mutex); ret = send_usb_cmd(ccp, CTL_SET_FAN_FPWM, channel, val, 0); if (!ret) ccp->target[channel] = -ENODATA; mutex_unlock(&ccp->mutex); return ret; } static int set_target(struct ccp_device *ccp, int channel, long val) { int ret; val = clamp_val(val, 0, 0xFFFF); ccp->target[channel] = val; mutex_lock(&ccp->mutex); ret = send_usb_cmd(ccp, CTL_SET_FAN_TARGET, channel, val >> 8, val); mutex_unlock(&ccp->mutex); return ret; } static int ccp_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) { struct ccp_device *ccp = dev_get_drvdata(dev); switch (type) { case hwmon_fan: switch (attr) { case hwmon_fan_label: *str = ccp->fan_label[channel]; return 0; default: break; } break; default: break; } return -EOPNOTSUPP; } static int ccp_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct ccp_device *ccp = dev_get_drvdata(dev); int ret; switch (type) { case hwmon_temp: switch (attr) { case hwmon_temp_input: ret = get_data(ccp, CTL_GET_TMP, channel, true); if (ret < 0) return ret; *val = ret * 10; return 0; default: break; } break; case hwmon_fan: switch (attr) { case hwmon_fan_input: ret = get_data(ccp, CTL_GET_FAN_RPM, channel, true); if (ret < 0) return ret; *val = ret; return 0; case hwmon_fan_target: /* how to read target values from the device is unknown */ /* driver returns last set value or 0 */ if (ccp->target[channel] < 0) return -ENODATA; *val = ccp->target[channel]; return 0; default: break; } break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: ret = get_data(ccp, CTL_GET_FAN_PWM, channel, false); if (ret < 0) return ret; *val = DIV_ROUND_CLOSEST(ret * 255, 100); return 0; default: break; } break; case hwmon_in: switch (attr) { case hwmon_in_input: ret = get_data(ccp, CTL_GET_VOLT, channel, true); if (ret < 0) return ret; *val = ret; return 0; default: break; } break; default: break; } return -EOPNOTSUPP; }; static int ccp_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long val) { struct ccp_device *ccp = dev_get_drvdata(dev); switch (type) { case hwmon_pwm: switch (attr) { case hwmon_pwm_input: return set_pwm(ccp, channel, val); default: break; } break; case hwmon_fan: switch (attr) { case hwmon_fan_target: return set_target(ccp, channel, val); default: break; } break; default: break; } return -EOPNOTSUPP; }; static umode_t ccp_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) { const struct ccp_device *ccp = data; switch (type) { case hwmon_temp: if (!test_bit(channel, ccp->temp_cnct)) break; switch (attr) { case hwmon_temp_input: return 0444; case hwmon_temp_label: return 0444; default: break; } break; case hwmon_fan: if (!test_bit(channel, ccp->fan_cnct)) break; switch (attr) { case hwmon_fan_input: return 0444; case hwmon_fan_label: return 0444; case hwmon_fan_target: return 0644; default: break; } break; case hwmon_pwm: if (!test_bit(channel, ccp->fan_cnct)) break; switch (attr) { case hwmon_pwm_input: return 0644; default: break; } break; case hwmon_in: switch (attr) { case hwmon_in_input: return 0444; default: break; } break; default: break; } return 0; }; static const struct hwmon_ops ccp_hwmon_ops = { .is_visible = ccp_is_visible, .read = ccp_read, .read_string = ccp_read_string, .write = ccp_write, }; static const struct hwmon_channel_info * const ccp_info[] = { HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ), HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT, HWMON_T_INPUT, HWMON_T_INPUT, HWMON_T_INPUT ), HWMON_CHANNEL_INFO(fan, HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_TARGET, HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_TARGET, HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_TARGET, HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_TARGET, HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_TARGET, HWMON_F_INPUT | HWMON_F_LABEL | HWMON_F_TARGET ), HWMON_CHANNEL_INFO(pwm, HWMON_PWM_INPUT, HWMON_PWM_INPUT, HWMON_PWM_INPUT, HWMON_PWM_INPUT, HWMON_PWM_INPUT, HWMON_PWM_INPUT ), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT, HWMON_I_INPUT, HWMON_I_INPUT ), NULL }; static const struct hwmon_chip_info ccp_chip_info = { .ops = &ccp_hwmon_ops, .info = ccp_info, }; /* read fan connection status and set labels */ static int get_fan_cnct(struct ccp_device *ccp) { int channel; int mode; int ret; ret = send_usb_cmd(ccp, CTL_GET_FAN_CNCT, 0, 0, 0); if (ret) return ret; for (channel = 0; channel < NUM_FANS; channel++) { mode = ccp->buffer[channel + 1]; if (mode == 0) continue; set_bit(channel, ccp->fan_cnct); ccp->target[channel] = -ENODATA; switch (mode) { case 1: scnprintf(ccp->fan_label[channel], LABEL_LENGTH, "fan%d 3pin", channel + 1); break; case 2: scnprintf(ccp->fan_label[channel], LABEL_LENGTH, "fan%d 4pin", channel + 1); break; default: scnprintf(ccp->fan_label[channel], LABEL_LENGTH, "fan%d other", channel + 1); break; } } return 0; } /* read temp sensor connection status */ static int get_temp_cnct(struct ccp_device *ccp) { int channel; int mode; int ret; ret = send_usb_cmd(ccp, CTL_GET_TMP_CNCT, 0, 0, 0); if (ret) return ret; for (channel = 0; channel < NUM_TEMP_SENSORS; channel++) { mode = ccp->buffer[channel + 1]; if (mode == 0) continue; set_bit(channel, ccp->temp_cnct); } return 0; } /* read firmware version */ static int get_fw_version(struct ccp_device *ccp) { int ret; ret = send_usb_cmd(ccp, CTL_GET_FW_VER, 0, 0, 0); if (ret) { hid_notice(ccp->hdev, "Failed to read firmware version.\n"); return ret; } ccp->firmware_ver[0] = ccp->buffer[1]; ccp->firmware_ver[1] = ccp->buffer[2]; ccp->firmware_ver[2] = ccp->buffer[3]; return 0; } /* read bootloader version */ static int get_bl_version(struct ccp_device *ccp) { int ret; ret = send_usb_cmd(ccp, CTL_GET_BL_VER, 0, 0, 0); if (ret) { hid_notice(ccp->hdev, "Failed to read bootloader version.\n"); return ret; } ccp->bootloader_ver[0] = ccp->buffer[1]; ccp->bootloader_ver[1] = ccp->buffer[2]; return 0; } static int firmware_show(struct seq_file *seqf, void *unused) { struct ccp_device *ccp = seqf->private; seq_printf(seqf, "%d.%d.%d\n", ccp->firmware_ver[0], ccp->firmware_ver[1], ccp->firmware_ver[2]); return 0; } DEFINE_SHOW_ATTRIBUTE(firmware); static int bootloader_show(struct seq_file *seqf, void *unused) { struct ccp_device *ccp = seqf->private; seq_printf(seqf, "%d.%d\n", ccp->bootloader_ver[0], ccp->bootloader_ver[1]); return 0; } DEFINE_SHOW_ATTRIBUTE(bootloader); static void ccp_debugfs_init(struct ccp_device *ccp) { char name[32]; int ret; scnprintf(name, sizeof(name), "corsaircpro-%s", dev_name(&ccp->hdev->dev)); ccp->debugfs = debugfs_create_dir(name, NULL); ret = get_fw_version(ccp); if (!ret) debugfs_create_file("firmware_version", 0444, ccp->debugfs, ccp, &firmware_fops); ret = get_bl_version(ccp); if (!ret) debugfs_create_file("bootloader_version", 0444, ccp->debugfs, ccp, &bootloader_fops); } static int ccp_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct ccp_device *ccp; int ret; ccp = devm_kzalloc(&hdev->dev, sizeof(*ccp), GFP_KERNEL); if (!ccp) return -ENOMEM; ccp->cmd_buffer = devm_kmalloc(&hdev->dev, OUT_BUFFER_SIZE, GFP_KERNEL); if (!ccp->cmd_buffer) return -ENOMEM; ccp->buffer = devm_kmalloc(&hdev->dev, IN_BUFFER_SIZE, GFP_KERNEL); if (!ccp->buffer) return -ENOMEM; ret = hid_parse(hdev); if (ret) return ret; ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); if (ret) return ret; ret = hid_hw_open(hdev); if (ret) goto out_hw_stop; ccp->hdev = hdev; hid_set_drvdata(hdev, ccp); mutex_init(&ccp->mutex); spin_lock_init(&ccp->wait_input_report_lock); init_completion(&ccp->wait_input_report); hid_device_io_start(hdev); /* temp and fan connection status only updates when device is powered on */ ret = get_temp_cnct(ccp); if (ret) goto out_hw_close; ret = get_fan_cnct(ccp); if (ret) goto out_hw_close; ccp_debugfs_init(ccp); ccp->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "corsaircpro", ccp, &ccp_chip_info, NULL); if (IS_ERR(ccp->hwmon_dev)) { ret = PTR_ERR(ccp->hwmon_dev); goto out_hw_close; } return 0; out_hw_close: hid_hw_close(hdev); out_hw_stop: hid_hw_stop(hdev); return ret; } static void ccp_remove(struct hid_device *hdev) { struct ccp_device *ccp = hid_get_drvdata(hdev); debugfs_remove_recursive(ccp->debugfs); hwmon_device_unregister(ccp->hwmon_dev); hid_hw_close(hdev); hid_hw_stop(hdev); } static const struct hid_device_id ccp_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_PRODUCT_ID_CORSAIR_COMMANDERPRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_PRODUCT_ID_CORSAIR_1000D) }, { } }; static struct hid_driver ccp_driver = { .name = "corsair-cpro", .id_table = ccp_devices, .probe = ccp_probe, .remove = ccp_remove, .raw_event = ccp_raw_event, }; MODULE_DEVICE_TABLE(hid, ccp_devices); MODULE_DESCRIPTION("Corsair Commander Pro controller driver"); MODULE_LICENSE("GPL"); static int __init ccp_init(void) { return hid_register_driver(&ccp_driver); } static void __exit ccp_exit(void) { hid_unregister_driver(&ccp_driver); } /* * When compiling this driver as built-in, hwmon initcalls will get called before the * hid driver and this driver would fail to register. late_initcall solves this. */ late_initcall(ccp_init); module_exit(ccp_exit); |
8 3 5 8 8 5 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | // SPDX-License-Identifier: GPL-2.0-only /* * This file contains vfs directory ops for the 9P2000 protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uio.h> #include <linux/fscache.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" /** * struct p9_rdir - readdir accounting * @head: start offset of current dirread buffer * @tail: end offset of current dirread buffer * @buf: dirread buffer * * private structure for keeping track of readdir * allocated on demand */ struct p9_rdir { int head; int tail; uint8_t buf[]; }; /** * dt_type - return file type * @mistat: mistat structure * */ static inline int dt_type(struct p9_wstat *mistat) { unsigned long perm = mistat->mode; int rettype = DT_REG; if (perm & P9_DMDIR) rettype = DT_DIR; if (perm & P9_DMSYMLINK) rettype = DT_LNK; return rettype; } /** * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir * @filp: opened file structure * @buflen: Length in bytes of buffer to allocate * */ static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen) { struct p9_fid *fid = filp->private_data; if (!fid->rdir) fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); return fid->rdir; } /** * v9fs_dir_readdir - iterate through a directory * @file: opened file structure * @ctx: actor we feed the entries to * */ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) { bool over; struct p9_wstat st; int err = 0; struct p9_fid *fid; int buflen; struct p9_rdir *rdir; struct kvec kvec; p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_IOHDRSZ; rdir = v9fs_alloc_rdir_buf(file, buflen); if (!rdir) return -ENOMEM; kvec.iov_base = rdir->buf; kvec.iov_len = buflen; while (1) { if (rdir->tail == rdir->head) { struct iov_iter to; int n; iov_iter_kvec(&to, ITER_DEST, &kvec, 1, buflen); n = p9_client_read(file->private_data, ctx->pos, &to, &err); if (err) return err; if (n == 0) return 0; rdir->head = 0; rdir->tail = n; } while (rdir->head < rdir->tail) { err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); if (err <= 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); return -EIO; } over = !dir_emit(ctx, st.name, strlen(st.name), QID2INO(&st.qid), dt_type(&st)); p9stat_free(&st); if (over) return 0; rdir->head += err; ctx->pos += err; } } } /** * v9fs_dir_readdir_dotl - iterate through a directory * @file: opened file structure * @ctx: actor we feed the entries to * */ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) { int err = 0; struct p9_fid *fid; int buflen; struct p9_rdir *rdir; struct p9_dirent curdirent; p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_READDIRHDRSZ; rdir = v9fs_alloc_rdir_buf(file, buflen); if (!rdir) return -ENOMEM; while (1) { if (rdir->tail == rdir->head) { err = p9_client_readdir(fid, rdir->buf, buflen, ctx->pos); if (err <= 0) return err; rdir->head = 0; rdir->tail = err; } while (rdir->head < rdir->tail) { err = p9dirent_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &curdirent); if (err < 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); return -EIO; } if (!dir_emit(ctx, curdirent.d_name, strlen(curdirent.d_name), QID2INO(&curdirent.qid), curdirent.d_type)) return 0; ctx->pos = curdirent.d_off; rdir->head += err; } } } /** * v9fs_dir_release - close a directory or a file * @inode: inode of the directory or file * @filp: file pointer to a directory or file * */ int v9fs_dir_release(struct inode *inode, struct file *filp) { struct v9fs_inode *v9inode = V9FS_I(inode); struct p9_fid *fid; __le32 version; loff_t i_size; int retval = 0, put_err; fid = filp->private_data; p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, fid ? fid->fid : -1); if (fid) { if ((S_ISREG(inode->i_mode)) && (filp->f_mode & FMODE_WRITE)) retval = filemap_fdatawrite(inode->i_mapping); spin_lock(&inode->i_lock); hlist_del(&fid->ilist); spin_unlock(&inode->i_lock); put_err = p9_fid_put(fid); retval = retval < 0 ? retval : put_err; } if ((filp->f_mode & FMODE_WRITE)) { version = cpu_to_le32(v9inode->qid.version); i_size = i_size_read(inode); fscache_unuse_cookie(v9fs_inode_cookie(v9inode), &version, &i_size); } else { fscache_unuse_cookie(v9fs_inode_cookie(v9inode), NULL, NULL); } return retval; } const struct file_operations v9fs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, .iterate_shared = v9fs_dir_readdir, .open = v9fs_file_open, .release = v9fs_dir_release, }; const struct file_operations v9fs_dir_operations_dotl = { .read = generic_read_dir, .llseek = generic_file_llseek, .iterate_shared = v9fs_dir_readdir_dotl, .open = v9fs_file_open, .release = v9fs_dir_release, .fsync = v9fs_file_fsync_dotl, }; |
11 11 2 7 6 25 25 25 67 34 1 7 1 1 11 2 8 10 8 30 30 30 30 30 30 30 30 30 30 25 30 30 30 30 30 30 30 30 30 32 30 30 30 30 24 30 30 30 30 30 135 135 135 3 2 5 1 3 3 12 16 115 72 40 34 72 25 25 24 25 25 25 25 25 23 2 25 25 24 25 25 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Digital Audio (PCM) abstract layer * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/time.h> #include <linux/mutex.h> #include <linux/device.h> #include <linux/nospec.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/pcm.h> #include <sound/timer.h> #include <sound/control.h> #include <sound/info.h> #include "pcm_local.h" MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, Abramo Bagnara <abramo@alsa-project.org>"); MODULE_DESCRIPTION("Midlevel PCM code for ALSA."); MODULE_LICENSE("GPL"); static LIST_HEAD(snd_pcm_devices); static DEFINE_MUTEX(register_mutex); #if IS_ENABLED(CONFIG_SND_PCM_OSS) static LIST_HEAD(snd_pcm_notify_list); #endif static int snd_pcm_free(struct snd_pcm *pcm); static int snd_pcm_dev_free(struct snd_device *device); static int snd_pcm_dev_register(struct snd_device *device); static int snd_pcm_dev_disconnect(struct snd_device *device); static struct snd_pcm *snd_pcm_get(struct snd_card *card, int device) { struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { if (pcm->card == card && pcm->device == device) return pcm; } return NULL; } static int snd_pcm_next(struct snd_card *card, int device) { struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { if (pcm->card == card && pcm->device > device) return pcm->device; else if (pcm->card->number > card->number) return -1; } return -1; } static int snd_pcm_add(struct snd_pcm *newpcm) { struct snd_pcm *pcm; if (newpcm->internal) return 0; list_for_each_entry(pcm, &snd_pcm_devices, list) { if (pcm->card == newpcm->card && pcm->device == newpcm->device) return -EBUSY; if (pcm->card->number > newpcm->card->number || (pcm->card == newpcm->card && pcm->device > newpcm->device)) { list_add(&newpcm->list, pcm->list.prev); return 0; } } list_add_tail(&newpcm->list, &snd_pcm_devices); return 0; } static int snd_pcm_control_ioctl(struct snd_card *card, struct snd_ctl_file *control, unsigned int cmd, unsigned long arg) { switch (cmd) { case SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE: { int device; if (get_user(device, (int __user *)arg)) return -EFAULT; scoped_guard(mutex, ®ister_mutex) device = snd_pcm_next(card, device); if (put_user(device, (int __user *)arg)) return -EFAULT; return 0; } case SNDRV_CTL_IOCTL_PCM_INFO: { struct snd_pcm_info __user *info; unsigned int device, subdevice; int stream; struct snd_pcm *pcm; struct snd_pcm_str *pstr; struct snd_pcm_substream *substream; info = (struct snd_pcm_info __user *)arg; if (get_user(device, &info->device)) return -EFAULT; if (get_user(stream, &info->stream)) return -EFAULT; if (stream < 0 || stream > 1) return -EINVAL; stream = array_index_nospec(stream, 2); if (get_user(subdevice, &info->subdevice)) return -EFAULT; guard(mutex)(®ister_mutex); pcm = snd_pcm_get(card, device); if (pcm == NULL) return -ENXIO; pstr = &pcm->streams[stream]; if (pstr->substream_count == 0) return -ENOENT; if (subdevice >= pstr->substream_count) return -ENXIO; for (substream = pstr->substream; substream; substream = substream->next) if (substream->number == (int)subdevice) break; if (substream == NULL) return -ENXIO; guard(mutex)(&pcm->open_mutex); return snd_pcm_info_user(substream, info); } case SNDRV_CTL_IOCTL_PCM_PREFER_SUBDEVICE: { int val; if (get_user(val, (int __user *)arg)) return -EFAULT; control->preferred_subdevice[SND_CTL_SUBDEV_PCM] = val; return 0; } } return -ENOIOCTLCMD; } #define FORMAT(v) [SNDRV_PCM_FORMAT_##v] = #v static const char * const snd_pcm_format_names[] = { FORMAT(S8), FORMAT(U8), FORMAT(S16_LE), FORMAT(S16_BE), FORMAT(U16_LE), FORMAT(U16_BE), FORMAT(S24_LE), FORMAT(S24_BE), FORMAT(U24_LE), FORMAT(U24_BE), FORMAT(S32_LE), FORMAT(S32_BE), FORMAT(U32_LE), FORMAT(U32_BE), FORMAT(FLOAT_LE), FORMAT(FLOAT_BE), FORMAT(FLOAT64_LE), FORMAT(FLOAT64_BE), FORMAT(IEC958_SUBFRAME_LE), FORMAT(IEC958_SUBFRAME_BE), FORMAT(MU_LAW), FORMAT(A_LAW), FORMAT(IMA_ADPCM), FORMAT(MPEG), FORMAT(GSM), FORMAT(SPECIAL), FORMAT(S24_3LE), FORMAT(S24_3BE), FORMAT(U24_3LE), FORMAT(U24_3BE), FORMAT(S20_3LE), FORMAT(S20_3BE), FORMAT(U20_3LE), FORMAT(U20_3BE), FORMAT(S18_3LE), FORMAT(S18_3BE), FORMAT(U18_3LE), FORMAT(U18_3BE), FORMAT(G723_24), FORMAT(G723_24_1B), FORMAT(G723_40), FORMAT(G723_40_1B), FORMAT(DSD_U8), FORMAT(DSD_U16_LE), FORMAT(DSD_U32_LE), FORMAT(DSD_U16_BE), FORMAT(DSD_U32_BE), FORMAT(S20_LE), FORMAT(S20_BE), FORMAT(U20_LE), FORMAT(U20_BE), }; /** * snd_pcm_format_name - Return a name string for the given PCM format * @format: PCM format * * Return: the format name string */ const char *snd_pcm_format_name(snd_pcm_format_t format) { unsigned int format_num = (__force unsigned int)format; if (format_num >= ARRAY_SIZE(snd_pcm_format_names) || !snd_pcm_format_names[format_num]) return "Unknown"; return snd_pcm_format_names[format_num]; } EXPORT_SYMBOL_GPL(snd_pcm_format_name); #ifdef CONFIG_SND_VERBOSE_PROCFS #define STATE(v) [SNDRV_PCM_STATE_##v] = #v #define STREAM(v) [SNDRV_PCM_STREAM_##v] = #v #define READY(v) [SNDRV_PCM_READY_##v] = #v #define XRUN(v) [SNDRV_PCM_XRUN_##v] = #v #define SILENCE(v) [SNDRV_PCM_SILENCE_##v] = #v #define TSTAMP(v) [SNDRV_PCM_TSTAMP_##v] = #v #define ACCESS(v) [SNDRV_PCM_ACCESS_##v] = #v #define START(v) [SNDRV_PCM_START_##v] = #v #define SUBFORMAT(v) [SNDRV_PCM_SUBFORMAT_##v] = #v static const char * const snd_pcm_stream_names[] = { STREAM(PLAYBACK), STREAM(CAPTURE), }; static const char * const snd_pcm_state_names[] = { STATE(OPEN), STATE(SETUP), STATE(PREPARED), STATE(RUNNING), STATE(XRUN), STATE(DRAINING), STATE(PAUSED), STATE(SUSPENDED), STATE(DISCONNECTED), }; static const char * const snd_pcm_access_names[] = { ACCESS(MMAP_INTERLEAVED), ACCESS(MMAP_NONINTERLEAVED), ACCESS(MMAP_COMPLEX), ACCESS(RW_INTERLEAVED), ACCESS(RW_NONINTERLEAVED), }; static const char * const snd_pcm_subformat_names[] = { SUBFORMAT(STD), SUBFORMAT(MSBITS_MAX), SUBFORMAT(MSBITS_20), SUBFORMAT(MSBITS_24), }; static const char * const snd_pcm_tstamp_mode_names[] = { TSTAMP(NONE), TSTAMP(ENABLE), }; static const char *snd_pcm_stream_name(int stream) { return snd_pcm_stream_names[stream]; } static const char *snd_pcm_access_name(snd_pcm_access_t access) { return snd_pcm_access_names[(__force int)access]; } static const char *snd_pcm_subformat_name(snd_pcm_subformat_t subformat) { return snd_pcm_subformat_names[(__force int)subformat]; } static const char *snd_pcm_tstamp_mode_name(int mode) { return snd_pcm_tstamp_mode_names[mode]; } static const char *snd_pcm_state_name(snd_pcm_state_t state) { return snd_pcm_state_names[(__force int)state]; } #if IS_ENABLED(CONFIG_SND_PCM_OSS) #include <linux/soundcard.h> static const char *snd_pcm_oss_format_name(int format) { switch (format) { case AFMT_MU_LAW: return "MU_LAW"; case AFMT_A_LAW: return "A_LAW"; case AFMT_IMA_ADPCM: return "IMA_ADPCM"; case AFMT_U8: return "U8"; case AFMT_S16_LE: return "S16_LE"; case AFMT_S16_BE: return "S16_BE"; case AFMT_S8: return "S8"; case AFMT_U16_LE: return "U16_LE"; case AFMT_U16_BE: return "U16_BE"; case AFMT_MPEG: return "MPEG"; default: return "unknown"; } } #endif static void snd_pcm_proc_info_read(struct snd_pcm_substream *substream, struct snd_info_buffer *buffer) { struct snd_pcm_info *info __free(kfree) = NULL; int err; if (! substream) return; info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) return; err = snd_pcm_info(substream, info); if (err < 0) { snd_iprintf(buffer, "error %d\n", err); return; } snd_iprintf(buffer, "card: %d\n", info->card); snd_iprintf(buffer, "device: %d\n", info->device); snd_iprintf(buffer, "subdevice: %d\n", info->subdevice); snd_iprintf(buffer, "stream: %s\n", snd_pcm_stream_name(info->stream)); snd_iprintf(buffer, "id: %s\n", info->id); snd_iprintf(buffer, "name: %s\n", info->name); snd_iprintf(buffer, "subname: %s\n", info->subname); snd_iprintf(buffer, "class: %d\n", info->dev_class); snd_iprintf(buffer, "subclass: %d\n", info->dev_subclass); snd_iprintf(buffer, "subdevices_count: %d\n", info->subdevices_count); snd_iprintf(buffer, "subdevices_avail: %d\n", info->subdevices_avail); } static void snd_pcm_stream_proc_info_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { snd_pcm_proc_info_read(((struct snd_pcm_str *)entry->private_data)->substream, buffer); } static void snd_pcm_substream_proc_info_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { snd_pcm_proc_info_read(entry->private_data, buffer); } static void snd_pcm_substream_proc_hw_params_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; struct snd_pcm_runtime *runtime; guard(mutex)(&substream->pcm->open_mutex); runtime = substream->runtime; if (!runtime) { snd_iprintf(buffer, "closed\n"); return; } if (runtime->state == SNDRV_PCM_STATE_OPEN) { snd_iprintf(buffer, "no setup\n"); return; } snd_iprintf(buffer, "access: %s\n", snd_pcm_access_name(runtime->access)); snd_iprintf(buffer, "format: %s\n", snd_pcm_format_name(runtime->format)); snd_iprintf(buffer, "subformat: %s\n", snd_pcm_subformat_name(runtime->subformat)); snd_iprintf(buffer, "channels: %u\n", runtime->channels); snd_iprintf(buffer, "rate: %u (%u/%u)\n", runtime->rate, runtime->rate_num, runtime->rate_den); snd_iprintf(buffer, "period_size: %lu\n", runtime->period_size); snd_iprintf(buffer, "buffer_size: %lu\n", runtime->buffer_size); #if IS_ENABLED(CONFIG_SND_PCM_OSS) if (substream->oss.oss) { snd_iprintf(buffer, "OSS format: %s\n", snd_pcm_oss_format_name(runtime->oss.format)); snd_iprintf(buffer, "OSS channels: %u\n", runtime->oss.channels); snd_iprintf(buffer, "OSS rate: %u\n", runtime->oss.rate); snd_iprintf(buffer, "OSS period bytes: %lu\n", (unsigned long)runtime->oss.period_bytes); snd_iprintf(buffer, "OSS periods: %u\n", runtime->oss.periods); snd_iprintf(buffer, "OSS period frames: %lu\n", (unsigned long)runtime->oss.period_frames); } #endif } static void snd_pcm_substream_proc_sw_params_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; struct snd_pcm_runtime *runtime; guard(mutex)(&substream->pcm->open_mutex); runtime = substream->runtime; if (!runtime) { snd_iprintf(buffer, "closed\n"); return; } if (runtime->state == SNDRV_PCM_STATE_OPEN) { snd_iprintf(buffer, "no setup\n"); return; } snd_iprintf(buffer, "tstamp_mode: %s\n", snd_pcm_tstamp_mode_name(runtime->tstamp_mode)); snd_iprintf(buffer, "period_step: %u\n", runtime->period_step); snd_iprintf(buffer, "avail_min: %lu\n", runtime->control->avail_min); snd_iprintf(buffer, "start_threshold: %lu\n", runtime->start_threshold); snd_iprintf(buffer, "stop_threshold: %lu\n", runtime->stop_threshold); snd_iprintf(buffer, "silence_threshold: %lu\n", runtime->silence_threshold); snd_iprintf(buffer, "silence_size: %lu\n", runtime->silence_size); snd_iprintf(buffer, "boundary: %lu\n", runtime->boundary); } static void snd_pcm_substream_proc_status_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; struct snd_pcm_runtime *runtime; struct snd_pcm_status64 status; int err; guard(mutex)(&substream->pcm->open_mutex); runtime = substream->runtime; if (!runtime) { snd_iprintf(buffer, "closed\n"); return; } memset(&status, 0, sizeof(status)); err = snd_pcm_status64(substream, &status); if (err < 0) { snd_iprintf(buffer, "error %d\n", err); return; } snd_iprintf(buffer, "state: %s\n", snd_pcm_state_name(status.state)); snd_iprintf(buffer, "owner_pid : %d\n", pid_vnr(substream->pid)); snd_iprintf(buffer, "trigger_time: %lld.%09lld\n", status.trigger_tstamp_sec, status.trigger_tstamp_nsec); snd_iprintf(buffer, "tstamp : %lld.%09lld\n", status.tstamp_sec, status.tstamp_nsec); snd_iprintf(buffer, "delay : %ld\n", status.delay); snd_iprintf(buffer, "avail : %ld\n", status.avail); snd_iprintf(buffer, "avail_max : %ld\n", status.avail_max); snd_iprintf(buffer, "-----\n"); snd_iprintf(buffer, "hw_ptr : %ld\n", runtime->status->hw_ptr); snd_iprintf(buffer, "appl_ptr : %ld\n", runtime->control->appl_ptr); #ifdef CONFIG_SND_PCM_XRUN_DEBUG snd_iprintf(buffer, "xrun_counter: %d\n", substream->xrun_counter); #endif } #ifdef CONFIG_SND_PCM_XRUN_DEBUG static void snd_pcm_xrun_injection_write(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; snd_pcm_stop_xrun(substream); } static void snd_pcm_xrun_debug_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_str *pstr = entry->private_data; snd_iprintf(buffer, "%d\n", pstr->xrun_debug); } static void snd_pcm_xrun_debug_write(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_str *pstr = entry->private_data; char line[64]; if (!snd_info_get_line(buffer, line, sizeof(line))) pstr->xrun_debug = simple_strtoul(line, NULL, 10); } #endif static int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr) { struct snd_pcm *pcm = pstr->pcm; struct snd_info_entry *entry; char name[16]; sprintf(name, "pcm%i%c", pcm->device, pstr->stream == SNDRV_PCM_STREAM_PLAYBACK ? 'p' : 'c'); entry = snd_info_create_card_entry(pcm->card, name, pcm->card->proc_root); if (!entry) return -ENOMEM; entry->mode = S_IFDIR | 0555; pstr->proc_root = entry; entry = snd_info_create_card_entry(pcm->card, "info", pstr->proc_root); if (entry) snd_info_set_text_ops(entry, pstr, snd_pcm_stream_proc_info_read); #ifdef CONFIG_SND_PCM_XRUN_DEBUG entry = snd_info_create_card_entry(pcm->card, "xrun_debug", pstr->proc_root); if (entry) { snd_info_set_text_ops(entry, pstr, snd_pcm_xrun_debug_read); entry->c.text.write = snd_pcm_xrun_debug_write; entry->mode |= 0200; } #endif return 0; } static int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr) { snd_info_free_entry(pstr->proc_root); pstr->proc_root = NULL; return 0; } static struct snd_info_entry * create_substream_info_entry(struct snd_pcm_substream *substream, const char *name, void (*read)(struct snd_info_entry *, struct snd_info_buffer *)) { struct snd_info_entry *entry; entry = snd_info_create_card_entry(substream->pcm->card, name, substream->proc_root); if (entry) snd_info_set_text_ops(entry, substream, read); return entry; } static int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream) { struct snd_info_entry *entry; struct snd_card *card; char name[16]; card = substream->pcm->card; sprintf(name, "sub%i", substream->number); entry = snd_info_create_card_entry(card, name, substream->pstr->proc_root); if (!entry) return -ENOMEM; entry->mode = S_IFDIR | 0555; substream->proc_root = entry; create_substream_info_entry(substream, "info", snd_pcm_substream_proc_info_read); create_substream_info_entry(substream, "hw_params", snd_pcm_substream_proc_hw_params_read); create_substream_info_entry(substream, "sw_params", snd_pcm_substream_proc_sw_params_read); create_substream_info_entry(substream, "status", snd_pcm_substream_proc_status_read); #ifdef CONFIG_SND_PCM_XRUN_DEBUG entry = create_substream_info_entry(substream, "xrun_injection", NULL); if (entry) { entry->c.text.write = snd_pcm_xrun_injection_write; entry->mode = S_IFREG | 0200; } #endif /* CONFIG_SND_PCM_XRUN_DEBUG */ return 0; } #else /* !CONFIG_SND_VERBOSE_PROCFS */ static inline int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr) { return 0; } static inline int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr) { return 0; } static inline int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream) { return 0; } #endif /* CONFIG_SND_VERBOSE_PROCFS */ static const struct attribute_group *pcm_dev_attr_groups[]; /* * PM callbacks: we need to deal only with suspend here, as the resume is * triggered either from user-space or the driver's resume callback */ #ifdef CONFIG_PM_SLEEP static int do_pcm_suspend(struct device *dev) { struct snd_pcm_str *pstr = dev_get_drvdata(dev); if (!pstr->pcm->no_device_suspend) snd_pcm_suspend_all(pstr->pcm); return 0; } #endif static const struct dev_pm_ops pcm_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(do_pcm_suspend, NULL) }; /* device type for PCM -- basically only for passing PM callbacks */ static const struct device_type pcm_dev_type = { .name = "pcm", .pm = &pcm_dev_pm_ops, }; /** * snd_pcm_new_stream - create a new PCM stream * @pcm: the pcm instance * @stream: the stream direction, SNDRV_PCM_STREAM_XXX * @substream_count: the number of substreams * * Creates a new stream for the pcm. * The corresponding stream on the pcm must have been empty before * calling this, i.e. zero must be given to the argument of * snd_pcm_new(). * * Return: Zero if successful, or a negative error code on failure. */ int snd_pcm_new_stream(struct snd_pcm *pcm, int stream, int substream_count) { int idx, err; struct snd_pcm_str *pstr = &pcm->streams[stream]; struct snd_pcm_substream *substream, *prev; #if IS_ENABLED(CONFIG_SND_PCM_OSS) mutex_init(&pstr->oss.setup_mutex); #endif pstr->stream = stream; pstr->pcm = pcm; pstr->substream_count = substream_count; if (!substream_count) return 0; err = snd_device_alloc(&pstr->dev, pcm->card); if (err < 0) return err; dev_set_name(pstr->dev, "pcmC%iD%i%c", pcm->card->number, pcm->device, stream == SNDRV_PCM_STREAM_PLAYBACK ? 'p' : 'c'); pstr->dev->groups = pcm_dev_attr_groups; pstr->dev->type = &pcm_dev_type; dev_set_drvdata(pstr->dev, pstr); if (!pcm->internal) { err = snd_pcm_stream_proc_init(pstr); if (err < 0) { pcm_err(pcm, "Error in snd_pcm_stream_proc_init\n"); return err; } } prev = NULL; for (idx = 0, prev = NULL; idx < substream_count; idx++) { substream = kzalloc(sizeof(*substream), GFP_KERNEL); if (!substream) return -ENOMEM; substream->pcm = pcm; substream->pstr = pstr; substream->number = idx; substream->stream = stream; sprintf(substream->name, "subdevice #%i", idx); substream->buffer_bytes_max = UINT_MAX; if (prev == NULL) pstr->substream = substream; else prev->next = substream; if (!pcm->internal) { err = snd_pcm_substream_proc_init(substream); if (err < 0) { pcm_err(pcm, "Error in snd_pcm_stream_proc_init\n"); if (prev == NULL) pstr->substream = NULL; else prev->next = NULL; kfree(substream); return err; } } substream->group = &substream->self_group; snd_pcm_group_init(&substream->self_group); list_add_tail(&substream->link_list, &substream->self_group.substreams); atomic_set(&substream->mmap_count, 0); prev = substream; } return 0; } EXPORT_SYMBOL(snd_pcm_new_stream); static int _snd_pcm_new(struct snd_card *card, const char *id, int device, int playback_count, int capture_count, bool internal, struct snd_pcm **rpcm) { struct snd_pcm *pcm; int err; static const struct snd_device_ops ops = { .dev_free = snd_pcm_dev_free, .dev_register = snd_pcm_dev_register, .dev_disconnect = snd_pcm_dev_disconnect, }; static const struct snd_device_ops internal_ops = { .dev_free = snd_pcm_dev_free, }; if (snd_BUG_ON(!card)) return -ENXIO; if (rpcm) *rpcm = NULL; pcm = kzalloc(sizeof(*pcm), GFP_KERNEL); if (!pcm) return -ENOMEM; pcm->card = card; pcm->device = device; pcm->internal = internal; mutex_init(&pcm->open_mutex); init_waitqueue_head(&pcm->open_wait); INIT_LIST_HEAD(&pcm->list); if (id) strscpy(pcm->id, id, sizeof(pcm->id)); err = snd_pcm_new_stream(pcm, SNDRV_PCM_STREAM_PLAYBACK, playback_count); if (err < 0) goto free_pcm; err = snd_pcm_new_stream(pcm, SNDRV_PCM_STREAM_CAPTURE, capture_count); if (err < 0) goto free_pcm; err = snd_device_new(card, SNDRV_DEV_PCM, pcm, internal ? &internal_ops : &ops); if (err < 0) goto free_pcm; if (rpcm) *rpcm = pcm; return 0; free_pcm: snd_pcm_free(pcm); return err; } /** * snd_pcm_new - create a new PCM instance * @card: the card instance * @id: the id string * @device: the device index (zero based) * @playback_count: the number of substreams for playback * @capture_count: the number of substreams for capture * @rpcm: the pointer to store the new pcm instance * * Creates a new PCM instance. * * The pcm operators have to be set afterwards to the new instance * via snd_pcm_set_ops(). * * Return: Zero if successful, or a negative error code on failure. */ int snd_pcm_new(struct snd_card *card, const char *id, int device, int playback_count, int capture_count, struct snd_pcm **rpcm) { return _snd_pcm_new(card, id, device, playback_count, capture_count, false, rpcm); } EXPORT_SYMBOL(snd_pcm_new); /** * snd_pcm_new_internal - create a new internal PCM instance * @card: the card instance * @id: the id string * @device: the device index (zero based - shared with normal PCMs) * @playback_count: the number of substreams for playback * @capture_count: the number of substreams for capture * @rpcm: the pointer to store the new pcm instance * * Creates a new internal PCM instance with no userspace device or procfs * entries. This is used by ASoC Back End PCMs in order to create a PCM that * will only be used internally by kernel drivers. i.e. it cannot be opened * by userspace. It provides existing ASoC components drivers with a substream * and access to any private data. * * The pcm operators have to be set afterwards to the new instance * via snd_pcm_set_ops(). * * Return: Zero if successful, or a negative error code on failure. */ int snd_pcm_new_internal(struct snd_card *card, const char *id, int device, int playback_count, int capture_count, struct snd_pcm **rpcm) { return _snd_pcm_new(card, id, device, playback_count, capture_count, true, rpcm); } EXPORT_SYMBOL(snd_pcm_new_internal); static void free_chmap(struct snd_pcm_str *pstr) { if (pstr->chmap_kctl) { struct snd_card *card = pstr->pcm->card; snd_ctl_remove(card, pstr->chmap_kctl); pstr->chmap_kctl = NULL; } } static void snd_pcm_free_stream(struct snd_pcm_str * pstr) { struct snd_pcm_substream *substream, *substream_next; #if IS_ENABLED(CONFIG_SND_PCM_OSS) struct snd_pcm_oss_setup *setup, *setupn; #endif /* free all proc files under the stream */ snd_pcm_stream_proc_done(pstr); substream = pstr->substream; while (substream) { substream_next = substream->next; snd_pcm_timer_done(substream); kfree(substream); substream = substream_next; } #if IS_ENABLED(CONFIG_SND_PCM_OSS) for (setup = pstr->oss.setup_list; setup; setup = setupn) { setupn = setup->next; kfree(setup->task_name); kfree(setup); } #endif free_chmap(pstr); if (pstr->substream_count) put_device(pstr->dev); } #if IS_ENABLED(CONFIG_SND_PCM_OSS) #define pcm_call_notify(pcm, call) \ do { \ struct snd_pcm_notify *_notify; \ list_for_each_entry(_notify, &snd_pcm_notify_list, list) \ _notify->call(pcm); \ } while (0) #else #define pcm_call_notify(pcm, call) do {} while (0) #endif static int snd_pcm_free(struct snd_pcm *pcm) { if (!pcm) return 0; if (!pcm->internal) pcm_call_notify(pcm, n_unregister); if (pcm->private_free) pcm->private_free(pcm); snd_pcm_lib_preallocate_free_for_all(pcm); snd_pcm_free_stream(&pcm->streams[SNDRV_PCM_STREAM_PLAYBACK]); snd_pcm_free_stream(&pcm->streams[SNDRV_PCM_STREAM_CAPTURE]); kfree(pcm); return 0; } static int snd_pcm_dev_free(struct snd_device *device) { struct snd_pcm *pcm = device->device_data; return snd_pcm_free(pcm); } int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, struct file *file, struct snd_pcm_substream **rsubstream) { struct snd_pcm_str * pstr; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; struct snd_card *card; int prefer_subdevice; size_t size; if (snd_BUG_ON(!pcm || !rsubstream)) return -ENXIO; if (snd_BUG_ON(stream != SNDRV_PCM_STREAM_PLAYBACK && stream != SNDRV_PCM_STREAM_CAPTURE)) return -EINVAL; *rsubstream = NULL; pstr = &pcm->streams[stream]; if (pstr->substream == NULL || pstr->substream_count == 0) return -ENODEV; card = pcm->card; prefer_subdevice = snd_ctl_get_preferred_subdevice(card, SND_CTL_SUBDEV_PCM); if (pcm->info_flags & SNDRV_PCM_INFO_HALF_DUPLEX) { int opposite = !stream; for (substream = pcm->streams[opposite].substream; substream; substream = substream->next) { if (SUBSTREAM_BUSY(substream)) return -EAGAIN; } } if (file->f_flags & O_APPEND) { if (prefer_subdevice < 0) { if (pstr->substream_count > 1) return -EINVAL; /* must be unique */ substream = pstr->substream; } else { for (substream = pstr->substream; substream; substream = substream->next) if (substream->number == prefer_subdevice) break; } if (! substream) return -ENODEV; if (! SUBSTREAM_BUSY(substream)) return -EBADFD; substream->ref_count++; *rsubstream = substream; return 0; } for (substream = pstr->substream; substream; substream = substream->next) { if (!SUBSTREAM_BUSY(substream) && (prefer_subdevice == -1 || substream->number == prefer_subdevice)) break; } if (substream == NULL) return -EAGAIN; runtime = kzalloc(sizeof(*runtime), GFP_KERNEL); if (runtime == NULL) return -ENOMEM; size = PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status)); runtime->status = alloc_pages_exact(size, GFP_KERNEL); if (runtime->status == NULL) { kfree(runtime); return -ENOMEM; } memset(runtime->status, 0, size); size = PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control)); runtime->control = alloc_pages_exact(size, GFP_KERNEL); if (runtime->control == NULL) { free_pages_exact(runtime->status, PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status))); kfree(runtime); return -ENOMEM; } memset(runtime->control, 0, size); init_waitqueue_head(&runtime->sleep); init_waitqueue_head(&runtime->tsleep); __snd_pcm_set_state(runtime, SNDRV_PCM_STATE_OPEN); mutex_init(&runtime->buffer_mutex); atomic_set(&runtime->buffer_accessing, 0); substream->runtime = runtime; substream->private_data = pcm->private_data; substream->ref_count = 1; substream->f_flags = file->f_flags; substream->pid = get_pid(task_pid(current)); pstr->substream_opened++; *rsubstream = substream; #ifdef CONFIG_SND_PCM_XRUN_DEBUG substream->xrun_counter = 0; #endif /* CONFIG_SND_PCM_XRUN_DEBUG */ return 0; } void snd_pcm_detach_substream(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; if (PCM_RUNTIME_CHECK(substream)) return; runtime = substream->runtime; if (runtime->private_free != NULL) runtime->private_free(runtime); free_pages_exact(runtime->status, PAGE_ALIGN(sizeof(struct snd_pcm_mmap_status))); free_pages_exact(runtime->control, PAGE_ALIGN(sizeof(struct snd_pcm_mmap_control))); kfree(runtime->hw_constraints.rules); /* Avoid concurrent access to runtime via PCM timer interface */ if (substream->timer) { scoped_guard(spinlock_irq, &substream->timer->lock) substream->runtime = NULL; } else { substream->runtime = NULL; } mutex_destroy(&runtime->buffer_mutex); snd_fasync_free(runtime->fasync); kfree(runtime); put_pid(substream->pid); substream->pid = NULL; substream->pstr->substream_opened--; } static ssize_t pcm_class_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_pcm_str *pstr = dev_get_drvdata(dev); struct snd_pcm *pcm = pstr->pcm; const char *str; static const char *strs[SNDRV_PCM_CLASS_LAST + 1] = { [SNDRV_PCM_CLASS_GENERIC] = "generic", [SNDRV_PCM_CLASS_MULTI] = "multi", [SNDRV_PCM_CLASS_MODEM] = "modem", [SNDRV_PCM_CLASS_DIGITIZER] = "digitizer", }; if (pcm->dev_class > SNDRV_PCM_CLASS_LAST) str = "none"; else str = strs[pcm->dev_class]; return sysfs_emit(buf, "%s\n", str); } static DEVICE_ATTR_RO(pcm_class); static struct attribute *pcm_dev_attrs[] = { &dev_attr_pcm_class.attr, NULL }; static const struct attribute_group pcm_dev_attr_group = { .attrs = pcm_dev_attrs, }; static const struct attribute_group *pcm_dev_attr_groups[] = { &pcm_dev_attr_group, NULL }; static int snd_pcm_dev_register(struct snd_device *device) { int cidx, err; struct snd_pcm_substream *substream; struct snd_pcm *pcm; if (snd_BUG_ON(!device || !device->device_data)) return -ENXIO; pcm = device->device_data; guard(mutex)(®ister_mutex); err = snd_pcm_add(pcm); if (err) return err; for (cidx = 0; cidx < 2; cidx++) { int devtype = -1; if (pcm->streams[cidx].substream == NULL) continue; switch (cidx) { case SNDRV_PCM_STREAM_PLAYBACK: devtype = SNDRV_DEVICE_TYPE_PCM_PLAYBACK; break; case SNDRV_PCM_STREAM_CAPTURE: devtype = SNDRV_DEVICE_TYPE_PCM_CAPTURE; break; } /* register pcm */ err = snd_register_device(devtype, pcm->card, pcm->device, &snd_pcm_f_ops[cidx], pcm, pcm->streams[cidx].dev); if (err < 0) { list_del_init(&pcm->list); return err; } for (substream = pcm->streams[cidx].substream; substream; substream = substream->next) snd_pcm_timer_init(substream); } pcm_call_notify(pcm, n_register); return err; } static int snd_pcm_dev_disconnect(struct snd_device *device) { struct snd_pcm *pcm = device->device_data; struct snd_pcm_substream *substream; int cidx; guard(mutex)(®ister_mutex); guard(mutex)(&pcm->open_mutex); wake_up(&pcm->open_wait); list_del_init(&pcm->list); for_each_pcm_substream(pcm, cidx, substream) { snd_pcm_stream_lock_irq(substream); if (substream->runtime) { if (snd_pcm_running(substream)) snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED); /* to be sure, set the state unconditionally */ __snd_pcm_set_state(substream->runtime, SNDRV_PCM_STATE_DISCONNECTED); wake_up(&substream->runtime->sleep); wake_up(&substream->runtime->tsleep); } snd_pcm_stream_unlock_irq(substream); } for_each_pcm_substream(pcm, cidx, substream) snd_pcm_sync_stop(substream, false); pcm_call_notify(pcm, n_disconnect); for (cidx = 0; cidx < 2; cidx++) { if (pcm->streams[cidx].dev) snd_unregister_device(pcm->streams[cidx].dev); free_chmap(&pcm->streams[cidx]); } return 0; } #if IS_ENABLED(CONFIG_SND_PCM_OSS) /** * snd_pcm_notify - Add/remove the notify list * @notify: PCM notify list * @nfree: 0 = register, 1 = unregister * * This adds the given notifier to the global list so that the callback is * called for each registered PCM devices. This exists only for PCM OSS * emulation, so far. * * Return: zero if successful, or a negative error code */ int snd_pcm_notify(struct snd_pcm_notify *notify, int nfree) { struct snd_pcm *pcm; if (snd_BUG_ON(!notify || !notify->n_register || !notify->n_unregister || !notify->n_disconnect)) return -EINVAL; guard(mutex)(®ister_mutex); if (nfree) { list_del(¬ify->list); list_for_each_entry(pcm, &snd_pcm_devices, list) notify->n_unregister(pcm); } else { list_add_tail(¬ify->list, &snd_pcm_notify_list); list_for_each_entry(pcm, &snd_pcm_devices, list) notify->n_register(pcm); } return 0; } EXPORT_SYMBOL(snd_pcm_notify); #endif /* CONFIG_SND_PCM_OSS */ #ifdef CONFIG_SND_PROC_FS /* * Info interface */ static void snd_pcm_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm *pcm; guard(mutex)(®ister_mutex); list_for_each_entry(pcm, &snd_pcm_devices, list) { snd_iprintf(buffer, "%02i-%02i: %s : %s", pcm->card->number, pcm->device, pcm->id, pcm->name); if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) snd_iprintf(buffer, " : playback %i", pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream_count); if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) snd_iprintf(buffer, " : capture %i", pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream_count); snd_iprintf(buffer, "\n"); } } static struct snd_info_entry *snd_pcm_proc_entry; static void snd_pcm_proc_init(void) { struct snd_info_entry *entry; entry = snd_info_create_module_entry(THIS_MODULE, "pcm", NULL); if (entry) { snd_info_set_text_ops(entry, NULL, snd_pcm_proc_read); if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } } snd_pcm_proc_entry = entry; } static void snd_pcm_proc_done(void) { snd_info_free_entry(snd_pcm_proc_entry); } #else /* !CONFIG_SND_PROC_FS */ #define snd_pcm_proc_init() #define snd_pcm_proc_done() #endif /* CONFIG_SND_PROC_FS */ /* * ENTRY functions */ static int __init alsa_pcm_init(void) { snd_ctl_register_ioctl(snd_pcm_control_ioctl); snd_ctl_register_ioctl_compat(snd_pcm_control_ioctl); snd_pcm_proc_init(); return 0; } static void __exit alsa_pcm_exit(void) { snd_ctl_unregister_ioctl(snd_pcm_control_ioctl); snd_ctl_unregister_ioctl_compat(snd_pcm_control_ioctl); snd_pcm_proc_done(); } module_init(alsa_pcm_init) module_exit(alsa_pcm_exit) |
4 4 8 4 4 4 4 4 4 4 4 4 4 4 4 4 9 12 249 251 13 251 21 21 21 51 2 22 28 17 2 2 4 4 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Neil Brown 2002 * Copyright (C) Christoph Hellwig 2007 * * This file contains the code mapping from inodes to NFS file handles, * and for mapping back from file handles to dentries. * * For details on why we do all the strange and hairy things in here * take a look at Documentation/filesystems/nfs/exporting.rst. */ #include <linux/exportfs.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/cred.h> #define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, char *name, struct dentry *child) { const struct export_operations *nop = dir->d_sb->s_export_op; struct path path = {.mnt = mnt, .dentry = dir}; if (nop->get_name) return nop->get_name(dir, name, child); else return get_name(&path, name, child); } /* * Check if the dentry or any of it's aliases is acceptable. */ static struct dentry * find_acceptable_alias(struct dentry *result, int (*acceptable)(void *context, struct dentry *dentry), void *context) { struct dentry *dentry, *toput = NULL; struct inode *inode; if (acceptable(context, result)) return result; inode = result->d_inode; spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } spin_lock(&inode->i_lock); toput = dentry; } spin_unlock(&inode->i_lock); if (toput) dput(toput); return NULL; } static bool dentry_connected(struct dentry *dentry) { dget(dentry); while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); dput(dentry); if (dentry == parent) { dput(parent); return false; } dentry = parent; } dput(dentry); return true; } static void clear_disconnected(struct dentry *dentry) { dget(dentry); while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); WARN_ON_ONCE(IS_ROOT(dentry)); spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_DISCONNECTED; spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } dput(dentry); } /* * Reconnect a directory dentry with its parent. * * This can return a dentry, or NULL, or an error. * * In the first case the returned dentry is the parent of the given * dentry, and may itself need to be reconnected to its parent. * * In the NULL case, a concurrent VFS operation has either renamed or * removed this directory. The concurrent operation has reconnected our * dentry, so we no longer need to. */ static struct dentry *reconnect_one(struct vfsmount *mnt, struct dentry *dentry, char *nbuf) { struct dentry *parent; struct dentry *tmp; int err; parent = ERR_PTR(-EACCES); inode_lock(dentry->d_inode); if (mnt->mnt_sb->s_export_op->get_parent) parent = mnt->mnt_sb->s_export_op->get_parent(dentry); inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { dprintk("get_parent of %lu failed, err %ld\n", dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } dprintk("%s: find name of %lu in %lu\n", __func__, dentry->d_inode->i_ino, parent->d_inode->i_ino); err = exportfs_get_name(mnt, parent, nbuf, dentry); if (err == -ENOENT) goto out_reconnected; if (err) goto out_err; dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_unlocked(mnt_idmap(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } if (tmp != dentry) { /* * Somebody has renamed it since exportfs_get_name(); * great, since it could've only been renamed if it * got looked up and thus connected, and it would * remain connected afterwards. We are done. */ dput(tmp); goto out_reconnected; } dput(tmp); if (IS_ROOT(dentry)) { err = -ESTALE; goto out_err; } return parent; out_err: dput(parent); return ERR_PTR(err); out_reconnected: dput(parent); /* * Someone must have renamed our entry into another parent, in * which case it has been reconnected by the rename. * * Or someone removed it entirely, in which case filehandle * lookup will succeed but the directory is now IS_DEAD and * subsequent operations on it will fail. * * Alternatively, maybe there was no race at all, and the * filesystem is just corrupt and gave us a parent that doesn't * actually contain any entry pointing to this inode. So, * double check that this worked and return -ESTALE if not: */ if (!dentry_connected(dentry)) return ERR_PTR(-ESTALE); return NULL; } /* * Make sure target_dir is fully connected to the dentry tree. * * On successful return, DCACHE_DISCONNECTED will be cleared on * target_dir, and target_dir->d_parent->...->d_parent will reach the * root of the filesystem. * * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. * But the converse is not true: target_dir may have DCACHE_DISCONNECTED * set but already be connected. In that case we'll verify the * connection to root and then clear the flag. * * Note that target_dir could be removed by a concurrent operation. In * that case reconnect_path may still succeed with target_dir fully * connected, but further operations using the filehandle will fail when * necessary (due to S_DEAD being set on the directory). */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { struct dentry *dentry, *parent; dentry = dget(target_dir); while (dentry->d_flags & DCACHE_DISCONNECTED) { BUG_ON(dentry == mnt->mnt_sb->s_root); if (IS_ROOT(dentry)) parent = reconnect_one(mnt, dentry, nbuf); else parent = dget_parent(dentry); if (!parent) break; dput(dentry); if (IS_ERR(parent)) return PTR_ERR(parent); dentry = parent; } dput(dentry); clear_disconnected(target_dir); return 0; } struct getdents_callback { struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ u64 ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; /* * A rather strange filldir function to capture * the name matching the specified inode number. */ static bool filldir_one(struct dir_context *ctx, const char *name, int len, loff_t pos, u64 ino, unsigned int d_type) { struct getdents_callback *buf = container_of(ctx, struct getdents_callback, ctx); buf->sequence++; if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; return false; // no more } return true; } /** * get_name - default export_operations->get_name function * @path: the directory in which to find a name * @name: a pointer to a %NAME_MAX+1 char buffer to store the name * @child: the dentry for the child directory. * * calls readdir on the parent until it finds an entry with * the same inode number as the child, and returns that. */ static int get_name(const struct path *path, char *name, struct dentry *child) { const struct cred *cred = current_cred(); struct inode *dir = path->dentry->d_inode; int error; struct file *file; struct kstat stat; struct path child_path = { .mnt = path->mnt, .dentry = child, }; struct getdents_callback buffer = { .ctx.actor = filldir_one, .name = name, }; error = -ENOTDIR; if (!dir || !S_ISDIR(dir->i_mode)) goto out; error = -EINVAL; if (!dir->i_fop) goto out; /* * inode->i_ino is unsigned long, kstat->ino is u64, so the * former would be insufficient on 32-bit hosts when the * filesystem supports 64-bit inode numbers. So we need to * actually call ->getattr, not just read i_ino: */ error = vfs_getattr_nosec(&child_path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (error) return error; buffer.ino = stat.ino; /* * Open the directory ... */ file = dentry_open(path, O_RDONLY, cred); error = PTR_ERR(file); if (IS_ERR(file)) goto out; error = -EINVAL; if (!file->f_op->iterate_shared) goto out_close; buffer.sequence = 0; while (1) { int old_seq = buffer.sequence; error = iterate_dir(file, &buffer.ctx); if (buffer.found) { error = 0; break; } if (error < 0) break; error = -ENOENT; if (old_seq == buffer.sequence) break; } out_close: fput(file); out: return error; } #define FILEID_INO64_GEN_LEN 3 /** * exportfs_encode_ino64_fid - encode non-decodeable 64bit ino file id * @inode: the object to encode * @fid: where to store the file handle fragment * @max_len: maximum length to store there (in 4 byte units) * * This generic function is used to encode a non-decodeable file id for * fanotify for filesystems that do not support NFS export. */ static int exportfs_encode_ino64_fid(struct inode *inode, struct fid *fid, int *max_len) { if (*max_len < FILEID_INO64_GEN_LEN) { *max_len = FILEID_INO64_GEN_LEN; return FILEID_INVALID; } fid->i64.ino = inode->i_ino; fid->i64.gen = inode->i_generation; *max_len = FILEID_INO64_GEN_LEN; return FILEID_INO64_GEN; } /** * exportfs_encode_inode_fh - encode a file handle from inode * @inode: the object to encode * @fid: where to store the file handle fragment * @max_len: maximum length to store there * @parent: parent directory inode, if wanted * @flags: properties of the requested file handle * * Returns an enum fid_type or a negative errno. */ int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags) { const struct export_operations *nop = inode->i_sb->s_export_op; enum fid_type type; if (!exportfs_can_encode_fh(nop, flags)) return -EOPNOTSUPP; if (!nop && (flags & EXPORT_FH_FID)) type = exportfs_encode_ino64_fid(inode, fid, max_len); else type = nop->encode_fh(inode, fid->raw, max_len, parent); if (type > 0 && FILEID_USER_FLAGS(type)) { pr_warn_once("%s: unexpected fh type value 0x%x from fstype %s.\n", __func__, type, inode->i_sb->s_type->name); return -EINVAL; } return type; } EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); /** * exportfs_encode_fh - encode a file handle from dentry * @dentry: the object to encode * @fid: where to store the file handle fragment * @max_len: maximum length to store there * @flags: properties of the requested file handle * * Returns an enum fid_type or a negative errno. */ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int flags) { int error; struct dentry *p = NULL; struct inode *inode = dentry->d_inode, *parent = NULL; if ((flags & EXPORT_FH_CONNECTABLE) && !S_ISDIR(inode->i_mode)) { p = dget_parent(dentry); /* * note that while p might've ceased to be our parent already, * it's still pinned by and still positive. */ parent = p->d_inode; } error = exportfs_encode_inode_fh(inode, fid, max_len, parent, flags); dput(p); return error; } EXPORT_SYMBOL_GPL(exportfs_encode_fh); struct dentry * exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, unsigned int flags, int (*acceptable)(void *, struct dentry *), void *context) { const struct export_operations *nop = mnt->mnt_sb->s_export_op; struct dentry *result, *alias; char nbuf[NAME_MAX+1]; int err; if (fileid_type < 0 || FILEID_USER_FLAGS(fileid_type)) return ERR_PTR(-EINVAL); /* * Try to get any dentry for the given file handle from the filesystem. */ if (!exportfs_can_decode_fh(nop)) return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); if (IS_ERR_OR_NULL(result)) return result; if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) { err = -ENOTDIR; goto err_result; } /* * If no acceptance criteria was specified by caller, a disconnected * dentry is also accepatable. Callers may use this mode to query if * file handle is stale or to get a reference to an inode without * risking the high overhead caused by directory reconnect. */ if (!acceptable) return result; if (d_is_dir(result)) { /* * This request is for a directory. * * On the positive side there is only one dentry for each * directory inode. On the negative side this implies that we * to ensure our dentry is connected all the way up to the * filesystem root. */ if (result->d_flags & DCACHE_DISCONNECTED) { err = reconnect_path(mnt, result, nbuf); if (err) goto err_result; } if (!acceptable(context, result)) { err = -EACCES; goto err_result; } return result; } else { /* * It's not a directory. Life is a little more complicated. */ struct dentry *target_dir, *nresult; /* * See if either the dentry we just got from the filesystem * or any alias for it is acceptable. This is always true * if this filesystem is exported without the subtreecheck * option. If the filesystem is exported with the subtree * check option there's a fair chance we need to look at * the parent directory in the file handle and make sure * it's connected to the filesystem root. */ alias = find_acceptable_alias(result, acceptable, context); if (alias) return alias; /* * Try to extract a dentry for the parent directory from the * file handle. If this fails we'll have to give up. */ err = -ESTALE; if (!nop->fh_to_parent) goto err_result; target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, fh_len, fileid_type); if (!target_dir) goto err_result; err = PTR_ERR(target_dir); if (IS_ERR(target_dir)) goto err_result; /* * And as usual we need to make sure the parent directory is * connected to the filesystem root. The VFS really doesn't * like disconnected directories.. */ err = reconnect_path(mnt, target_dir, nbuf); if (err) { dput(target_dir); goto err_result; } /* * Now that we've got both a well-connected parent and a * dentry for the inode we're after, make sure that our * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); if (err) { dput(target_dir); goto err_result; } inode_lock(target_dir->d_inode); nresult = lookup_one(mnt_idmap(mnt), nbuf, target_dir, strlen(nbuf)); if (!IS_ERR(nresult)) { if (unlikely(nresult->d_inode != result->d_inode)) { dput(nresult); nresult = ERR_PTR(-ESTALE); } } inode_unlock(target_dir->d_inode); /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); if (IS_ERR(nresult)) { err = PTR_ERR(nresult); goto err_result; } dput(result); result = nresult; /* * And finally make sure the dentry is actually acceptable * to NFSD. */ alias = find_acceptable_alias(result, acceptable, context); if (!alias) { err = -EACCES; goto err_result; } return alias; } err_result: dput(result); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context) { struct dentry *ret; ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0, acceptable, context); if (IS_ERR_OR_NULL(ret)) { if (ret == ERR_PTR(-ENOMEM)) return ret; return ERR_PTR(-ESTALE); } return ret; } EXPORT_SYMBOL_GPL(exportfs_decode_fh); MODULE_LICENSE("GPL"); |
1 1 1 384 383 384 383 384 384 180 297 384 384 280 280 280 280 280 127 15 127 192 191 192 200 200 200 23 200 1644 1645 13 120 120 119 1043 1042 1044 1041 1033 24 1044 190 19 1042 94 300 300 293 94 19 89 11 8 2 3 1 297 297 212 297 296 2 1 297 296 2 5 5 4 5 75 293 293 293 293 222 209 166 34 209 139 209 209 209 222 247 246 247 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 | // SPDX-License-Identifier: GPL-2.0 /* * linux/mm/swap_state.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * Swap reorganised 29.12.95, Stephen Tweedie * * Rewritten to use page cache, (C) 1998 Stephen Tweedie */ #include <linux/mm.h> #include <linux/gfp.h> #include <linux/kernel_stat.h> #include <linux/mempolicy.h> #include <linux/swap.h> #include <linux/swapops.h> #include <linux/init.h> #include <linux/pagemap.h> #include <linux/pagevec.h> #include <linux/backing-dev.h> #include <linux/blkdev.h> #include <linux/migrate.h> #include <linux/vmalloc.h> #include <linux/swap_slots.h> #include <linux/huge_mm.h> #include <linux/shmem_fs.h> #include "internal.h" #include "swap.h" /* * swapper_space is a fiction, retained to simplify the path through * vmscan's shrink_folio_list. */ static const struct address_space_operations swap_aops = { .writepage = swap_writepage, .dirty_folio = noop_dirty_folio, #ifdef CONFIG_MIGRATION .migrate_folio = migrate_folio, #endif }; struct address_space *swapper_spaces[MAX_SWAPFILES] __read_mostly; static unsigned int nr_swapper_spaces[MAX_SWAPFILES] __read_mostly; static bool enable_vma_readahead __read_mostly = true; #define SWAP_RA_ORDER_CEILING 5 #define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2) #define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1) #define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK #define SWAP_RA_WIN_MASK (~PAGE_MASK & ~SWAP_RA_HITS_MASK) #define SWAP_RA_HITS(v) ((v) & SWAP_RA_HITS_MASK) #define SWAP_RA_WIN(v) (((v) & SWAP_RA_WIN_MASK) >> SWAP_RA_WIN_SHIFT) #define SWAP_RA_ADDR(v) ((v) & PAGE_MASK) #define SWAP_RA_VAL(addr, win, hits) \ (((addr) & PAGE_MASK) | \ (((win) << SWAP_RA_WIN_SHIFT) & SWAP_RA_WIN_MASK) | \ ((hits) & SWAP_RA_HITS_MASK)) /* Initial readahead hits is 4 to start up with a small window */ #define GET_SWAP_RA_VAL(vma) \ (atomic_long_read(&(vma)->swap_readahead_info) ? : 4) static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); void show_swap_cache_info(void) { printk("%lu pages in swap cache\n", total_swapcache_pages()); printk("Free swap = %ldkB\n", K(get_nr_swap_pages())); printk("Total swap = %lukB\n", K(total_swap_pages)); } void *get_shadow_from_swap_cache(swp_entry_t entry) { struct address_space *address_space = swap_address_space(entry); pgoff_t idx = swap_cache_index(entry); void *shadow; shadow = xa_load(&address_space->i_pages, idx); if (xa_is_value(shadow)) return shadow; return NULL; } /* * add_to_swap_cache resembles filemap_add_folio on swapper_space, * but sets SwapCache flag and private instead of mapping and index. */ int add_to_swap_cache(struct folio *folio, swp_entry_t entry, gfp_t gfp, void **shadowp) { struct address_space *address_space = swap_address_space(entry); pgoff_t idx = swap_cache_index(entry); XA_STATE_ORDER(xas, &address_space->i_pages, idx, folio_order(folio)); unsigned long i, nr = folio_nr_pages(folio); void *old; xas_set_update(&xas, workingset_update_node); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio); VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); folio_ref_add(folio, nr); folio_set_swapcache(folio); folio->swap = entry; do { xas_lock_irq(&xas); xas_create_range(&xas); if (xas_error(&xas)) goto unlock; for (i = 0; i < nr; i++) { VM_BUG_ON_FOLIO(xas.xa_index != idx + i, folio); if (shadowp) { old = xas_load(&xas); if (xa_is_value(old)) *shadowp = old; } xas_store(&xas, folio); xas_next(&xas); } address_space->nrpages += nr; __node_stat_mod_folio(folio, NR_FILE_PAGES, nr); __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr); unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); if (!xas_error(&xas)) return 0; folio_clear_swapcache(folio); folio_ref_sub(folio, nr); return xas_error(&xas); } /* * This must be called only on folios that have * been verified to be in the swap cache. */ void __delete_from_swap_cache(struct folio *folio, swp_entry_t entry, void *shadow) { struct address_space *address_space = swap_address_space(entry); int i; long nr = folio_nr_pages(folio); pgoff_t idx = swap_cache_index(entry); XA_STATE(xas, &address_space->i_pages, idx); xas_set_update(&xas, workingset_update_node); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio); VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); for (i = 0; i < nr; i++) { void *entry = xas_store(&xas, shadow); VM_BUG_ON_PAGE(entry != folio, entry); xas_next(&xas); } folio->swap.val = 0; folio_clear_swapcache(folio); address_space->nrpages -= nr; __node_stat_mod_folio(folio, NR_FILE_PAGES, -nr); __lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr); } /** * add_to_swap - allocate swap space for a folio * @folio: folio we want to move to swap * * Allocate swap space for the folio and add the folio to the * swap cache. * * Context: Caller needs to hold the folio lock. * Return: Whether the folio was added to the swap cache. */ bool add_to_swap(struct folio *folio) { swp_entry_t entry; int err; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio); entry = folio_alloc_swap(folio); if (!entry.val) return false; /* * XArray node allocations from PF_MEMALLOC contexts could * completely exhaust the page allocator. __GFP_NOMEMALLOC * stops emergency reserves from being allocated. * * TODO: this could cause a theoretical memory reclaim * deadlock in the swap out path. */ /* * Add it to the swap cache. */ err = add_to_swap_cache(folio, entry, __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL); if (err) /* * add_to_swap_cache() doesn't return -EEXIST, so we can safely * clear SWAP_HAS_CACHE flag. */ goto fail; /* * Normally the folio will be dirtied in unmap because its * pte should be dirty. A special case is MADV_FREE page. The * page's pte could have dirty bit cleared but the folio's * SwapBacked flag is still set because clearing the dirty bit * and SwapBacked flag has no lock protected. For such folio, * unmap will not set dirty bit for it, so folio reclaim will * not write the folio out. This can cause data corruption when * the folio is swapped in later. Always setting the dirty flag * for the folio solves the problem. */ folio_mark_dirty(folio); return true; fail: put_swap_folio(folio, entry); return false; } /* * This must be called only on folios that have * been verified to be in the swap cache and locked. * It will never put the folio into the free list, * the caller has a reference on the folio. */ void delete_from_swap_cache(struct folio *folio) { swp_entry_t entry = folio->swap; struct address_space *address_space = swap_address_space(entry); xa_lock_irq(&address_space->i_pages); __delete_from_swap_cache(folio, entry, NULL); xa_unlock_irq(&address_space->i_pages); put_swap_folio(folio, entry); folio_ref_sub(folio, folio_nr_pages(folio)); } void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end) { unsigned long curr = begin; void *old; for (;;) { swp_entry_t entry = swp_entry(type, curr); unsigned long index = curr & SWAP_ADDRESS_SPACE_MASK; struct address_space *address_space = swap_address_space(entry); XA_STATE(xas, &address_space->i_pages, index); xas_set_update(&xas, workingset_update_node); xa_lock_irq(&address_space->i_pages); xas_for_each(&xas, old, min(index + (end - curr), SWAP_ADDRESS_SPACE_PAGES)) { if (!xa_is_value(old)) continue; xas_store(&xas, NULL); } xa_unlock_irq(&address_space->i_pages); /* search the next swapcache until we meet end */ curr >>= SWAP_ADDRESS_SPACE_SHIFT; curr++; curr <<= SWAP_ADDRESS_SPACE_SHIFT; if (curr > end) break; } } /* * If we are the only user, then try to free up the swap cache. * * Its ok to check the swapcache flag without the folio lock * here because we are going to recheck again inside * folio_free_swap() _with_ the lock. * - Marcelo */ void free_swap_cache(struct folio *folio) { if (folio_test_swapcache(folio) && !folio_mapped(folio) && folio_trylock(folio)) { folio_free_swap(folio); folio_unlock(folio); } } /* * Perform a free_page(), also freeing any swap cache associated with * this page if it is the last user of the page. */ void free_page_and_swap_cache(struct page *page) { struct folio *folio = page_folio(page); free_swap_cache(folio); if (!is_huge_zero_folio(folio)) folio_put(folio); } /* * Passed an array of pages, drop them all from swapcache and then release * them. They are removed from the LRU and freed if this is their last use. */ void free_pages_and_swap_cache(struct encoded_page **pages, int nr) { struct folio_batch folios; unsigned int refs[PAGEVEC_SIZE]; folio_batch_init(&folios); for (int i = 0; i < nr; i++) { struct folio *folio = page_folio(encoded_page_ptr(pages[i])); free_swap_cache(folio); refs[folios.nr] = 1; if (unlikely(encoded_page_flags(pages[i]) & ENCODED_PAGE_BIT_NR_PAGES_NEXT)) refs[folios.nr] = encoded_nr_pages(pages[++i]); if (folio_batch_add(&folios, folio) == 0) folios_put_refs(&folios, refs); } if (folios.nr) folios_put_refs(&folios, refs); } static inline bool swap_use_vma_readahead(void) { return READ_ONCE(enable_vma_readahead) && !atomic_read(&nr_rotate_swap); } /* * Lookup a swap entry in the swap cache. A found folio will be returned * unlocked and with its refcount incremented - we rely on the kernel * lock getting page table operations atomic even if we drop the folio * lock before returning. * * Caller must lock the swap device or hold a reference to keep it valid. */ struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { struct folio *folio; folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry)); if (!IS_ERR(folio)) { bool vma_ra = swap_use_vma_readahead(); bool readahead; /* * At the moment, we don't support PG_readahead for anon THP * so let's bail out rather than confusing the readahead stat. */ if (unlikely(folio_test_large(folio))) return folio; readahead = folio_test_clear_readahead(folio); if (vma && vma_ra) { unsigned long ra_val; int win, hits; ra_val = GET_SWAP_RA_VAL(vma); win = SWAP_RA_WIN(ra_val); hits = SWAP_RA_HITS(ra_val); if (readahead) hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); atomic_long_set(&vma->swap_readahead_info, SWAP_RA_VAL(addr, win, hits)); } if (readahead) { count_vm_event(SWAP_RA_HIT); if (!vma || !vma_ra) atomic_inc(&swapin_readahead_hits); } } else { folio = NULL; } return folio; } /** * filemap_get_incore_folio - Find and get a folio from the page or swap caches. * @mapping: The address_space to search. * @index: The page cache index. * * This differs from filemap_get_folio() in that it will also look for the * folio in the swap cache. * * Return: The found folio or %NULL. */ struct folio *filemap_get_incore_folio(struct address_space *mapping, pgoff_t index) { swp_entry_t swp; struct swap_info_struct *si; struct folio *folio = filemap_get_entry(mapping, index); if (!folio) return ERR_PTR(-ENOENT); if (!xa_is_value(folio)) return folio; if (!shmem_mapping(mapping)) return ERR_PTR(-ENOENT); swp = radix_to_swp_entry(folio); /* There might be swapin error entries in shmem mapping. */ if (non_swap_entry(swp)) return ERR_PTR(-ENOENT); /* Prevent swapoff from happening to us */ si = get_swap_device(swp); if (!si) return ERR_PTR(-ENOENT); index = swap_cache_index(swp); folio = filemap_get_folio(swap_address_space(swp), index); put_swap_device(si); return folio; } struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated, bool skip_if_exists) { struct swap_info_struct *si; struct folio *folio; struct folio *new_folio = NULL; struct folio *result = NULL; void *shadow = NULL; *new_page_allocated = false; si = get_swap_device(entry); if (!si) return NULL; for (;;) { int err; /* * First check the swap cache. Since this is normally * called after swap_cache_get_folio() failed, re-calling * that would confuse statistics. */ folio = filemap_get_folio(swap_address_space(entry), swap_cache_index(entry)); if (!IS_ERR(folio)) goto got_folio; /* * Just skip read ahead for unused swap slot. * During swap_off when swap_slot_cache is disabled, * we have to handle the race between putting * swap entry in swap cache and marking swap slot * as SWAP_HAS_CACHE. That's done in later part of code or * else swap_off will be aborted if we return NULL. */ if (!swap_swapcount(si, entry) && swap_slot_cache_enabled) goto put_and_return; /* * Get a new folio to read into from swap. Allocate it now if * new_folio not exist, before marking swap_map SWAP_HAS_CACHE, * when -EEXIST will cause any racers to loop around until we * add it to cache. */ if (!new_folio) { new_folio = folio_alloc_mpol(gfp_mask, 0, mpol, ilx, numa_node_id()); if (!new_folio) goto put_and_return; } /* * Swap entry may have been freed since our caller observed it. */ err = swapcache_prepare(entry, 1); if (!err) break; else if (err != -EEXIST) goto put_and_return; /* * Protect against a recursive call to __read_swap_cache_async() * on the same entry waiting forever here because SWAP_HAS_CACHE * is set but the folio is not the swap cache yet. This can * happen today if mem_cgroup_swapin_charge_folio() below * triggers reclaim through zswap, which may call * __read_swap_cache_async() in the writeback path. */ if (skip_if_exists) goto put_and_return; /* * We might race against __delete_from_swap_cache(), and * stumble across a swap_map entry whose SWAP_HAS_CACHE * has not yet been cleared. Or race against another * __read_swap_cache_async(), which has set SWAP_HAS_CACHE * in swap_map, but not yet added its folio to swap cache. */ schedule_timeout_uninterruptible(1); } /* * The swap entry is ours to swap in. Prepare the new folio. */ __folio_set_locked(new_folio); __folio_set_swapbacked(new_folio); if (mem_cgroup_swapin_charge_folio(new_folio, NULL, gfp_mask, entry)) goto fail_unlock; /* May fail (-ENOMEM) if XArray node allocation failed. */ if (add_to_swap_cache(new_folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) goto fail_unlock; mem_cgroup_swapin_uncharge_swap(entry, 1); if (shadow) workingset_refault(new_folio, shadow); /* Caller will initiate read into locked new_folio */ folio_add_lru(new_folio); *new_page_allocated = true; folio = new_folio; got_folio: result = folio; goto put_and_return; fail_unlock: put_swap_folio(new_folio, entry); folio_unlock(new_folio); put_and_return: put_swap_device(si); if (!(*new_page_allocated) && new_folio) folio_put(new_folio); return result; } /* * Locate a page of swap in physical memory, reserving swap cache space * and reading the disk if it is not already cached. * A failure return means that either the page allocation failed or that * the swap entry is no longer in use. * * get/put_swap_device() aren't needed to call this function, because * __read_swap_cache_async() call them and swap_read_folio() holds the * swap cache folio lock. */ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr, struct swap_iocb **plug) { bool page_allocated; struct mempolicy *mpol; pgoff_t ilx; struct folio *folio; mpol = get_vma_policy(vma, addr, 0, &ilx); folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); mpol_cond_put(mpol); if (page_allocated) swap_read_folio(folio, plug); return folio; } static unsigned int __swapin_nr_pages(unsigned long prev_offset, unsigned long offset, int hits, int max_pages, int prev_win) { unsigned int pages, last_ra; /* * This heuristic has been found to work well on both sequential and * random loads, swapping to hard disk or to SSD: please don't ask * what the "+ 2" means, it just happens to work well, that's all. */ pages = hits + 2; if (pages == 2) { /* * We can have no readahead hits to judge by: but must not get * stuck here forever, so check for an adjacent offset instead * (and don't even bother to check whether swap type is same). */ if (offset != prev_offset + 1 && offset != prev_offset - 1) pages = 1; } else { unsigned int roundup = 4; while (roundup < pages) roundup <<= 1; pages = roundup; } if (pages > max_pages) pages = max_pages; /* Don't shrink readahead too fast */ last_ra = prev_win / 2; if (pages < last_ra) pages = last_ra; return pages; } static unsigned long swapin_nr_pages(unsigned long offset) { static unsigned long prev_offset; unsigned int hits, pages, max_pages; static atomic_t last_readahead_pages; max_pages = 1 << READ_ONCE(page_cluster); if (max_pages <= 1) return 1; hits = atomic_xchg(&swapin_readahead_hits, 0); pages = __swapin_nr_pages(READ_ONCE(prev_offset), offset, hits, max_pages, atomic_read(&last_readahead_pages)); if (!hits) WRITE_ONCE(prev_offset, offset); atomic_set(&last_readahead_pages, pages); return pages; } /** * swap_cluster_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @mpol: NUMA memory allocation policy to be applied * @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE * * Returns the struct folio for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... * * Note: it is intentional that the same NUMA policy and interleave index * are used for every page of the readahead: neighbouring pages on swap * are fairly likely to have been swapped out from the same node. */ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t ilx) { struct folio *folio; unsigned long entry_offset = swp_offset(entry); unsigned long offset = entry_offset; unsigned long start_offset, end_offset; unsigned long mask; struct swap_info_struct *si = swp_swap_info(entry); struct blk_plug plug; struct swap_iocb *splug = NULL; bool page_allocated; mask = swapin_nr_pages(offset) - 1; if (!mask) goto skip; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; if (!start_offset) /* First page is swap header. */ start_offset++; if (end_offset >= si->max) end_offset = si->max - 1; blk_start_plug(&plug); for (offset = start_offset; offset <= end_offset ; offset++) { /* Ok, do the async read-ahead now */ folio = __read_swap_cache_async( swp_entry(swp_type(entry), offset), gfp_mask, mpol, ilx, &page_allocated, false); if (!folio) continue; if (page_allocated) { swap_read_folio(folio, &splug); if (offset != entry_offset) { folio_set_readahead(folio); count_vm_event(SWAP_RA); } } folio_put(folio); } blk_finish_plug(&plug); swap_read_unplug(splug); lru_add_drain(); /* Push any new pages onto the LRU now */ skip: /* The page was likely read above, so no need for plugging here */ folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); if (unlikely(page_allocated)) swap_read_folio(folio, NULL); return folio; } int init_swap_address_space(unsigned int type, unsigned long nr_pages) { struct address_space *spaces, *space; unsigned int i, nr; nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES); spaces = kvcalloc(nr, sizeof(struct address_space), GFP_KERNEL); if (!spaces) return -ENOMEM; for (i = 0; i < nr; i++) { space = spaces + i; xa_init_flags(&space->i_pages, XA_FLAGS_LOCK_IRQ); atomic_set(&space->i_mmap_writable, 0); space->a_ops = &swap_aops; /* swap cache doesn't use writeback related tags */ mapping_set_no_writeback_tags(space); } nr_swapper_spaces[type] = nr; swapper_spaces[type] = spaces; return 0; } void exit_swap_address_space(unsigned int type) { int i; struct address_space *spaces = swapper_spaces[type]; for (i = 0; i < nr_swapper_spaces[type]; i++) VM_WARN_ON_ONCE(!mapping_empty(&spaces[i])); kvfree(spaces); nr_swapper_spaces[type] = 0; swapper_spaces[type] = NULL; } static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start, unsigned long *end) { struct vm_area_struct *vma = vmf->vma; unsigned long ra_val; unsigned long faddr, prev_faddr, left, right; unsigned int max_win, hits, prev_win, win; max_win = 1 << min(READ_ONCE(page_cluster), SWAP_RA_ORDER_CEILING); if (max_win == 1) return 1; faddr = vmf->address; ra_val = GET_SWAP_RA_VAL(vma); prev_faddr = SWAP_RA_ADDR(ra_val); prev_win = SWAP_RA_WIN(ra_val); hits = SWAP_RA_HITS(ra_val); win = __swapin_nr_pages(PFN_DOWN(prev_faddr), PFN_DOWN(faddr), hits, max_win, prev_win); atomic_long_set(&vma->swap_readahead_info, SWAP_RA_VAL(faddr, win, 0)); if (win == 1) return 1; if (faddr == prev_faddr + PAGE_SIZE) left = faddr; else if (prev_faddr == faddr + PAGE_SIZE) left = faddr - (win << PAGE_SHIFT) + PAGE_SIZE; else left = faddr - (((win - 1) / 2) << PAGE_SHIFT); right = left + (win << PAGE_SHIFT); if ((long)left < 0) left = 0; *start = max3(left, vma->vm_start, faddr & PMD_MASK); *end = min3(right, vma->vm_end, (faddr & PMD_MASK) + PMD_SIZE); return win; } /** * swap_vma_readahead - swap in pages in hope we need them soon * @targ_entry: swap entry of the targeted memory * @gfp_mask: memory allocation flags * @mpol: NUMA memory allocation policy to be applied * @targ_ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE * @vmf: fault information * * Returns the struct folio for entry and addr, after queueing swapin. * * Primitive swap readahead code. We simply read in a few pages whose * virtual addresses are around the fault address in the same vma. * * Caller must hold read mmap_lock if vmf->vma is not NULL. * */ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, struct mempolicy *mpol, pgoff_t targ_ilx, struct vm_fault *vmf) { struct blk_plug plug; struct swap_iocb *splug = NULL; struct folio *folio; pte_t *pte = NULL, pentry; int win; unsigned long start, end, addr; swp_entry_t entry; pgoff_t ilx; bool page_allocated; win = swap_vma_ra_win(vmf, &start, &end); if (win == 1) goto skip; ilx = targ_ilx - PFN_DOWN(vmf->address - start); blk_start_plug(&plug); for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) { if (!pte++) { pte = pte_offset_map(vmf->pmd, addr); if (!pte) break; } pentry = ptep_get_lockless(pte); if (!is_swap_pte(pentry)) continue; entry = pte_to_swp_entry(pentry); if (unlikely(non_swap_entry(entry))) continue; pte_unmap(pte); pte = NULL; folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); if (!folio) continue; if (page_allocated) { swap_read_folio(folio, &splug); if (addr != vmf->address) { folio_set_readahead(folio); count_vm_event(SWAP_RA); } } folio_put(folio); } if (pte) pte_unmap(pte); blk_finish_plug(&plug); swap_read_unplug(splug); lru_add_drain(); skip: /* The folio was likely read above, so no need for plugging here */ folio = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx, &page_allocated, false); if (unlikely(page_allocated)) swap_read_folio(folio, NULL); return folio; } /** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory * @gfp_mask: memory allocation flags * @vmf: fault information * * Returns the struct folio for entry and addr, after queueing swapin. * * It's a main entry function for swap readahead. By the configuration, * it will read ahead blocks by cluster-based(ie, physical disk based) * or vma-based(ie, virtual address based on faulty address) readahead. */ struct folio *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_fault *vmf) { struct mempolicy *mpol; pgoff_t ilx; struct folio *folio; mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx); folio = swap_use_vma_readahead() ? swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf) : swap_cluster_readahead(entry, gfp_mask, mpol, ilx); mpol_cond_put(mpol); return folio; } #ifdef CONFIG_SYSFS static ssize_t vma_ra_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", str_true_false(enable_vma_readahead)); } static ssize_t vma_ra_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { ssize_t ret; ret = kstrtobool(buf, &enable_vma_readahead); if (ret) return ret; return count; } static struct kobj_attribute vma_ra_enabled_attr = __ATTR_RW(vma_ra_enabled); static struct attribute *swap_attrs[] = { &vma_ra_enabled_attr.attr, NULL, }; static const struct attribute_group swap_attr_group = { .attrs = swap_attrs, }; static int __init swap_init_sysfs(void) { int err; struct kobject *swap_kobj; swap_kobj = kobject_create_and_add("swap", mm_kobj); if (!swap_kobj) { pr_err("failed to create swap kobject\n"); return -ENOMEM; } err = sysfs_create_group(swap_kobj, &swap_attr_group); if (err) { pr_err("failed to register swap group\n"); goto delete_obj; } return 0; delete_obj: kobject_put(swap_kobj); return err; } subsys_initcall(swap_init_sysfs); #endif |
32 32 21 21 21 21 37 37 36 21 21 17 12 12 21 37 4 7 13 6 2 12 1 1 1 24 24 5 5 5 19 4 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer FIFO * Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl> */ #include <sound/core.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include "seq_fifo.h" #include "seq_lock.h" /* FIFO */ /* create new fifo */ struct snd_seq_fifo *snd_seq_fifo_new(int poolsize) { struct snd_seq_fifo *f; f = kzalloc(sizeof(*f), GFP_KERNEL); if (!f) return NULL; f->pool = snd_seq_pool_new(poolsize); if (f->pool == NULL) { kfree(f); return NULL; } if (snd_seq_pool_init(f->pool) < 0) { snd_seq_pool_delete(&f->pool); kfree(f); return NULL; } spin_lock_init(&f->lock); snd_use_lock_init(&f->use_lock); init_waitqueue_head(&f->input_sleep); atomic_set(&f->overflow, 0); f->head = NULL; f->tail = NULL; f->cells = 0; return f; } void snd_seq_fifo_delete(struct snd_seq_fifo **fifo) { struct snd_seq_fifo *f; if (snd_BUG_ON(!fifo)) return; f = *fifo; if (snd_BUG_ON(!f)) return; *fifo = NULL; if (f->pool) snd_seq_pool_mark_closing(f->pool); snd_seq_fifo_clear(f); /* wake up clients if any */ if (waitqueue_active(&f->input_sleep)) wake_up(&f->input_sleep); /* release resources...*/ /*....................*/ if (f->pool) { snd_seq_pool_done(f->pool); snd_seq_pool_delete(&f->pool); } kfree(f); } static struct snd_seq_event_cell *fifo_cell_out(struct snd_seq_fifo *f); /* clear queue */ void snd_seq_fifo_clear(struct snd_seq_fifo *f) { struct snd_seq_event_cell *cell; /* clear overflow flag */ atomic_set(&f->overflow, 0); snd_use_lock_sync(&f->use_lock); guard(spinlock_irq)(&f->lock); /* drain the fifo */ while ((cell = fifo_cell_out(f)) != NULL) { snd_seq_cell_free(cell); } } /* enqueue event to fifo */ int snd_seq_fifo_event_in(struct snd_seq_fifo *f, struct snd_seq_event *event) { struct snd_seq_event_cell *cell; int err; if (snd_BUG_ON(!f)) return -EINVAL; snd_use_lock_use(&f->use_lock); err = snd_seq_event_dup(f->pool, event, &cell, 1, NULL, NULL); /* always non-blocking */ if (err < 0) { if ((err == -ENOMEM) || (err == -EAGAIN)) atomic_inc(&f->overflow); snd_use_lock_free(&f->use_lock); return err; } /* append new cells to fifo */ scoped_guard(spinlock_irqsave, &f->lock) { if (f->tail != NULL) f->tail->next = cell; f->tail = cell; if (f->head == NULL) f->head = cell; cell->next = NULL; f->cells++; } /* wakeup client */ if (waitqueue_active(&f->input_sleep)) wake_up(&f->input_sleep); snd_use_lock_free(&f->use_lock); return 0; /* success */ } /* dequeue cell from fifo */ static struct snd_seq_event_cell *fifo_cell_out(struct snd_seq_fifo *f) { struct snd_seq_event_cell *cell; cell = f->head; if (cell) { f->head = cell->next; /* reset tail if this was the last element */ if (f->tail == cell) f->tail = NULL; cell->next = NULL; f->cells--; } return cell; } /* dequeue cell from fifo and copy on user space */ int snd_seq_fifo_cell_out(struct snd_seq_fifo *f, struct snd_seq_event_cell **cellp, int nonblock) { struct snd_seq_event_cell *cell; unsigned long flags; wait_queue_entry_t wait; if (snd_BUG_ON(!f)) return -EINVAL; *cellp = NULL; init_waitqueue_entry(&wait, current); spin_lock_irqsave(&f->lock, flags); while ((cell = fifo_cell_out(f)) == NULL) { if (nonblock) { /* non-blocking - return immediately */ spin_unlock_irqrestore(&f->lock, flags); return -EAGAIN; } set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&f->input_sleep, &wait); spin_unlock_irqrestore(&f->lock, flags); schedule(); spin_lock_irqsave(&f->lock, flags); remove_wait_queue(&f->input_sleep, &wait); if (signal_pending(current)) { spin_unlock_irqrestore(&f->lock, flags); return -ERESTARTSYS; } } spin_unlock_irqrestore(&f->lock, flags); *cellp = cell; return 0; } void snd_seq_fifo_cell_putback(struct snd_seq_fifo *f, struct snd_seq_event_cell *cell) { if (cell) { guard(spinlock_irqsave)(&f->lock); cell->next = f->head; f->head = cell; if (!f->tail) f->tail = cell; f->cells++; } } /* polling; return non-zero if queue is available */ int snd_seq_fifo_poll_wait(struct snd_seq_fifo *f, struct file *file, poll_table *wait) { poll_wait(file, &f->input_sleep, wait); return (f->cells > 0); } /* change the size of pool; all old events are removed */ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize) { struct snd_seq_pool *newpool, *oldpool; struct snd_seq_event_cell *cell, *next, *oldhead; if (snd_BUG_ON(!f || !f->pool)) return -EINVAL; /* allocate new pool */ newpool = snd_seq_pool_new(poolsize); if (newpool == NULL) return -ENOMEM; if (snd_seq_pool_init(newpool) < 0) { snd_seq_pool_delete(&newpool); return -ENOMEM; } scoped_guard(spinlock_irq, &f->lock) { /* remember old pool */ oldpool = f->pool; oldhead = f->head; /* exchange pools */ f->pool = newpool; f->head = NULL; f->tail = NULL; f->cells = 0; /* NOTE: overflow flag is not cleared */ } /* close the old pool and wait until all users are gone */ snd_seq_pool_mark_closing(oldpool); snd_use_lock_sync(&f->use_lock); /* release cells in old pool */ for (cell = oldhead; cell; cell = next) { next = cell->next; snd_seq_cell_free(cell); } snd_seq_pool_delete(&oldpool); return 0; } /* get the number of unused cells safely */ int snd_seq_fifo_unused_cells(struct snd_seq_fifo *f) { int cells; if (!f) return 0; snd_use_lock_use(&f->use_lock); scoped_guard(spinlock_irqsave, &f->lock) cells = snd_seq_unused_cells(f->pool); snd_use_lock_free(&f->use_lock); return cells; } |
90 90 90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 | // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright IBM Corp. 2002, 2004 * Copyright (c) 2002 Intel Corp. * * This file is part of the SCTP kernel implementation * * Sysctl related interfaces for SCTP. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Mingqin Liu <liuming@us.ibm.com> * Jon Grimm <jgrimm@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * Ryan Layer <rmlayer@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <net/sctp/structs.h> #include <net/sctp/sctp.h> #include <linux/sysctl.h> static int timer_max = 86400000; /* ms in one day */ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = SCTP_SCOPE_POLICY_MAX; static int rwnd_scale_max = 16; static int rto_alpha_min = 0; static int rto_beta_min = 0; static int rto_alpha_max = 1000; static int rto_beta_max = 1000; static int pf_expose_max = SCTP_PF_EXPOSE_MAX; static int ps_retrans_max = SCTP_PS_RETRANS_MAX; static int udp_port_max = 65535; static unsigned long max_autoclose_min = 0; static unsigned long max_autoclose_max = (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), .mode = 0644, .proc_handler = proc_doulongvec_minmax }, { .procname = "sctp_rmem", .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "sctp_wmem", .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), .mode = 0644, .proc_handler = proc_dointvec, }, }; /* The following index defines are used in sctp_sysctl_net_register(). * If you add new items to the sctp_net_table, please ensure that * the index values of these defines hold the same meaning indicated by * their macro names when they appear in sctp_net_table. */ #define SCTP_RTO_MIN_IDX 0 #define SCTP_RTO_MAX_IDX 1 #define SCTP_PF_RETRANS_IDX 2 #define SCTP_PS_RETRANS_IDX 3 static struct ctl_table sctp_net_table[] = { [SCTP_RTO_MIN_IDX] = { .procname = "rto_min", .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_sctp_do_rto_min, .extra1 = SYSCTL_ONE, .extra2 = &init_net.sctp.rto_max }, [SCTP_RTO_MAX_IDX] = { .procname = "rto_max", .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_sctp_do_rto_max, .extra1 = &init_net.sctp.rto_min, .extra2 = &timer_max }, [SCTP_PF_RETRANS_IDX] = { .procname = "pf_retrans", .data = &init_net.sctp.pf_retrans, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &init_net.sctp.ps_retrans, }, [SCTP_PS_RETRANS_IDX] = { .procname = "ps_retrans", .data = &init_net.sctp.ps_retrans, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &init_net.sctp.pf_retrans, .extra2 = &ps_retrans_max, }, { .procname = "rto_initial", .data = &init_net.sctp.rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "rto_alpha_exp_divisor", .data = &init_net.sctp.rto_alpha, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_alpha_beta, .extra1 = &rto_alpha_min, .extra2 = &rto_alpha_max, }, { .procname = "rto_beta_exp_divisor", .data = &init_net.sctp.rto_beta, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_alpha_beta, .extra1 = &rto_beta_min, .extra2 = &rto_beta_max, }, { .procname = "max_burst", .data = &init_net.sctp.max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, { .procname = "cookie_preserve_enable", .data = &init_net.sctp.cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "cookie_hmac_alg", .data = &init_net.sctp.sctp_hmac_alg, .maxlen = 8, .mode = 0644, .proc_handler = proc_sctp_do_hmac_alg, }, { .procname = "valid_cookie_life", .data = &init_net.sctp.valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "sack_timeout", .data = &init_net.sctp.sack_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, { .procname = "hb_interval", .data = &init_net.sctp.hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &timer_max }, { .procname = "association_max_retrans", .data = &init_net.sctp.max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "path_max_retrans", .data = &init_net.sctp.max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "max_init_retransmits", .data = &init_net.sctp.max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, { .procname = "sndbuf_policy", .data = &init_net.sctp.sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "rcvbuf_policy", .data = &init_net.sctp.rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "default_auto_asconf", .data = &init_net.sctp.default_auto_asconf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addip_enable", .data = &init_net.sctp.addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addip_noauth_enable", .data = &init_net.sctp.addip_noauth, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "prsctp_enable", .data = &init_net.sctp.prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "reconf_enable", .data = &init_net.sctp.reconf_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "auth_enable", .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_auth, }, { .procname = "intl_enable", .data = &init_net.sctp.intl_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ecn_enable", .data = &init_net.sctp.ecn_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "plpmtud_probe_interval", .data = &init_net.sctp.probe_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_probe_interval, }, { .procname = "udp_port", .data = &init_net.sctp.udp_port, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_sctp_do_udp_port, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, { .procname = "encap_port", .data = &init_net.sctp.encap_port, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &udp_port_max, }, { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &addr_scope_max, }, { .procname = "rwnd_update_shift", .data = &init_net.sctp.rwnd_upd_shift, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .extra1 = SYSCTL_ONE, .extra2 = &rwnd_scale_max, }, { .procname = "max_autoclose", .data = &init_net.sctp.max_autoclose, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, #ifdef CONFIG_NET_L3_MASTER_DEV { .procname = "l3mdev_accept", .data = &init_net.sctp.l3mdev_accept, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, #endif { .procname = "pf_enable", .data = &init_net.sctp.pf_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "pf_expose", .data = &init_net.sctp.pf_expose, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &pf_expose_max, }, }; static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, sctp.sctp_hmac_alg); struct ctl_table tbl; bool changed = false; char *none = "none"; char tmp[8] = {0}; int ret; memset(&tbl, 0, sizeof(struct ctl_table)); if (write) { tbl.data = tmp; tbl.maxlen = sizeof(tmp); } else { tbl.data = net->sctp.sctp_hmac_alg ? : none; tbl.maxlen = strlen(tbl.data); } ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { #ifdef CONFIG_CRYPTO_MD5 if (!strncmp(tmp, "md5", 3)) { net->sctp.sctp_hmac_alg = "md5"; changed = true; } #endif #ifdef CONFIG_CRYPTO_SHA1 if (!strncmp(tmp, "sha1", 4)) { net->sctp.sctp_hmac_alg = "sha1"; changed = true; } #endif if (!strncmp(tmp, "none", 4)) { net->sctp.sctp_hmac_alg = NULL; changed = true; } if (!changed) ret = -EINVAL; } return ret; } static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, sctp.rto_min); unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.rto_min; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value > max || new_value < min) return -EINVAL; net->sctp.rto_min = new_value; } return ret; } static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, sctp.rto_max); unsigned int min = *(unsigned int *) ctl->extra1; unsigned int max = *(unsigned int *) ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.rto_max; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value > max || new_value < min) return -EINVAL; net->sctp.rto_max = new_value; } return ret; } static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { if (write) pr_warn_once("Changing rto_alpha or rto_beta may lead to " "suboptimal rtt/srtt estimations!\n"); return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); } static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, sctp.auth_enable); struct ctl_table tbl; int new_value, ret; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.auth_enable; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; net->sctp.auth_enable = new_value; /* Update the value in the control socket */ lock_sock(sk); sctp_sk(sk)->ep->auth_enable = new_value; release_sock(sk); } return ret; } static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, sctp.udp_port); unsigned int min = *(unsigned int *)ctl->extra1; unsigned int max = *(unsigned int *)ctl->extra2; struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.udp_port; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { struct sock *sk = net->sctp.ctl_sock; if (new_value > max || new_value < min) return -EINVAL; net->sctp.udp_port = new_value; sctp_udp_sock_stop(net); if (new_value) { ret = sctp_udp_sock_start(net); if (ret) net->sctp.udp_port = 0; } /* Update the value in the control socket */ lock_sock(sk); sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); release_sock(sk); } return ret; } static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, sctp.probe_interval); struct ctl_table tbl; int ret, new_value; memset(&tbl, 0, sizeof(struct ctl_table)); tbl.maxlen = sizeof(unsigned int); if (write) tbl.data = &new_value; else tbl.data = &net->sctp.probe_interval; ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { if (new_value && new_value < SCTP_PROBE_TIMER_MIN) return -EINVAL; net->sctp.probe_interval = new_value; } return ret; } int sctp_sysctl_net_register(struct net *net) { size_t table_size = ARRAY_SIZE(sctp_net_table); struct ctl_table *table; int i; table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); if (!table) return -ENOMEM; for (i = 0; i < table_size; i++) table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; table[SCTP_RTO_MIN_IDX].extra2 = &net->sctp.rto_max; table[SCTP_RTO_MAX_IDX].extra1 = &net->sctp.rto_min; table[SCTP_PF_RETRANS_IDX].extra2 = &net->sctp.ps_retrans; table[SCTP_PS_RETRANS_IDX].extra1 = &net->sctp.pf_retrans; net->sctp.sysctl_header = register_net_sysctl_sz(net, "net/sctp", table, table_size); if (net->sctp.sysctl_header == NULL) { kfree(table); return -ENOMEM; } return 0; } void sctp_sysctl_net_unregister(struct net *net) { const struct ctl_table *table; table = net->sctp.sysctl_header->ctl_table_arg; unregister_net_sysctl_table(net->sctp.sysctl_header); kfree(table); } static struct ctl_table_header *sctp_sysctl_header; /* Sysctl registration. */ void sctp_sysctl_register(void) { sctp_sysctl_header = register_net_sysctl(&init_net, "net/sctp", sctp_table); } /* Sysctl deregistration. */ void sctp_sysctl_unregister(void) { unregister_net_sysctl_table(sctp_sysctl_header); } |
23 1 22 22 40 40 15 2 25 25 25 3 23 2 25 2 23 28 12 1 6 2 5 4 3 1 7 2 2 9 8 3 4 1 1 7 1 5 1 5 2 2 2 2 1 6 1 3 3 63 1 7 13 3 3 7 3 8 9 6 7 65 1 2 53 1 1 3 1 4 1 3 31 2 1 1 1 2 6 1 10 4 1 7 1 2 1 11 11 14 15 1 14 46 46 34 34 6 6 5 5 2 4 4 4 1 4 1 3 2 3 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 | /* * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/in.h> #include <linux/ipv6.h> #include <linux/poll.h> #include <net/sock.h> #include "rds.h" /* this is just used for stats gathering :/ */ static DEFINE_SPINLOCK(rds_sock_lock); static unsigned long rds_sock_count; static LIST_HEAD(rds_sock_list); DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq); /* * This is called as the final descriptor referencing this socket is closed. * We have to unbind the socket so that another socket can be bound to the * address it was using. * * We have to be careful about racing with the incoming path. sock_orphan() * sets SOCK_DEAD and we use that as an indicator to the rx path that new * messages shouldn't be queued. */ static int rds_release(struct socket *sock) { struct sock *sk = sock->sk; struct rds_sock *rs; if (!sk) goto out; rs = rds_sk_to_rs(sk); sock_orphan(sk); /* Note - rds_clear_recv_queue grabs rs_recv_lock, so * that ensures the recv path has completed messing * with the socket. */ rds_clear_recv_queue(rs); rds_cong_remove_socket(rs); rds_remove_bound(rs); rds_send_drop_to(rs, NULL); rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue); spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); rds_sock_count--; spin_unlock_bh(&rds_sock_lock); rds_trans_put(rs->rs_transport); sock->sk = NULL; sock_put(sk); out: return 0; } /* * Careful not to race with rds_release -> sock_orphan which clears sk_sleep. * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but * this seems more conservative. * NB - normally, one would use sk_callback_lock for this, but we can * get here from interrupts, whereas the network code grabs sk_callback_lock * with _lock_bh only - so relying on sk_callback_lock introduces livelocks. */ void rds_wake_sk_sleep(struct rds_sock *rs) { unsigned long flags; read_lock_irqsave(&rs->rs_recv_lock, flags); __rds_wake_sk_sleep(rds_rs_to_sk(rs)); read_unlock_irqrestore(&rs->rs_recv_lock, flags); } static int rds_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); struct sockaddr_in6 *sin6; struct sockaddr_in *sin; int uaddr_len; /* racey, don't care */ if (peer) { if (ipv6_addr_any(&rs->rs_conn_addr)) return -ENOTCONN; if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) { sin = (struct sockaddr_in *)uaddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); sin->sin_family = AF_INET; sin->sin_port = rs->rs_conn_port; sin->sin_addr.s_addr = rs->rs_conn_addr_v4; uaddr_len = sizeof(*sin); } else { sin6 = (struct sockaddr_in6 *)uaddr; sin6->sin6_family = AF_INET6; sin6->sin6_port = rs->rs_conn_port; sin6->sin6_addr = rs->rs_conn_addr; sin6->sin6_flowinfo = 0; /* scope_id is the same as in the bound address. */ sin6->sin6_scope_id = rs->rs_bound_scope_id; uaddr_len = sizeof(*sin6); } } else { /* If socket is not yet bound and the socket is connected, * set the return address family to be the same as the * connected address, but with 0 address value. If it is not * connected, set the family to be AF_UNSPEC (value 0) and * the address size to be that of an IPv4 address. */ if (ipv6_addr_any(&rs->rs_bound_addr)) { if (ipv6_addr_any(&rs->rs_conn_addr)) { sin = (struct sockaddr_in *)uaddr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_UNSPEC; return sizeof(*sin); } #if IS_ENABLED(CONFIG_IPV6) if (!(ipv6_addr_type(&rs->rs_conn_addr) & IPV6_ADDR_MAPPED)) { sin6 = (struct sockaddr_in6 *)uaddr; memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; return sizeof(*sin6); } #endif sin = (struct sockaddr_in *)uaddr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; return sizeof(*sin); } if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { sin = (struct sockaddr_in *)uaddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); sin->sin_family = AF_INET; sin->sin_port = rs->rs_bound_port; sin->sin_addr.s_addr = rs->rs_bound_addr_v4; uaddr_len = sizeof(*sin); } else { sin6 = (struct sockaddr_in6 *)uaddr; sin6->sin6_family = AF_INET6; sin6->sin6_port = rs->rs_bound_port; sin6->sin6_addr = rs->rs_bound_addr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = rs->rs_bound_scope_id; uaddr_len = sizeof(*sin6); } } return uaddr_len; } /* * RDS' poll is without a doubt the least intuitive part of the interface, * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from * a network protocol. * * EPOLLIN is asserted if * - there is data on the receive queue. * - to signal that a previously congested destination may have become * uncongested * - A notification has been queued to the socket (this can be a congestion * update, or a RDMA completion, or a MSG_ZEROCOPY completion). * * EPOLLOUT is asserted if there is room on the send queue. This does not mean * however, that the next sendmsg() call will succeed. If the application tries * to send to a congested destination, the system call may still fail (and * return ENOBUFS). */ static __poll_t rds_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct rds_sock *rs = rds_sk_to_rs(sk); __poll_t mask = 0; unsigned long flags; poll_wait(file, sk_sleep(sk), wait); if (rs->rs_seen_congestion) poll_wait(file, &rds_poll_waitq, wait); read_lock_irqsave(&rs->rs_recv_lock, flags); if (!rs->rs_cong_monitor) { /* When a congestion map was updated, we signal EPOLLIN for * "historical" reasons. Applications can also poll for * WRBAND instead. */ if (rds_cong_updated_since(&rs->rs_cong_track)) mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND); } else { spin_lock(&rs->rs_lock); if (rs->rs_cong_notify) mask |= (EPOLLIN | EPOLLRDNORM); spin_unlock(&rs->rs_lock); } if (!list_empty(&rs->rs_recv_queue) || !list_empty(&rs->rs_notify_queue) || !list_empty(&rs->rs_zcookie_queue.zcookie_head)) mask |= (EPOLLIN | EPOLLRDNORM); if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) mask |= (EPOLLOUT | EPOLLWRNORM); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR; read_unlock_irqrestore(&rs->rs_recv_lock, flags); /* clear state any time we wake a seen-congested socket */ if (mask) rs->rs_seen_congestion = 0; return mask; } static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); rds_tos_t utos, tos = 0; switch (cmd) { case SIOCRDSSETTOS: if (get_user(utos, (rds_tos_t __user *)arg)) return -EFAULT; if (rs->rs_transport && rs->rs_transport->get_tos_map) tos = rs->rs_transport->get_tos_map(utos); else return -ENOIOCTLCMD; spin_lock_bh(&rds_sock_lock); if (rs->rs_tos || rs->rs_conn) { spin_unlock_bh(&rds_sock_lock); return -EINVAL; } rs->rs_tos = tos; spin_unlock_bh(&rds_sock_lock); break; case SIOCRDSGETTOS: spin_lock_bh(&rds_sock_lock); tos = rs->rs_tos; spin_unlock_bh(&rds_sock_lock); if (put_user(tos, (rds_tos_t __user *)arg)) return -EFAULT; break; default: return -ENOIOCTLCMD; } return 0; } static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len) { struct sockaddr_in6 sin6; struct sockaddr_in sin; int ret = 0; /* racing with another thread binding seems ok here */ if (ipv6_addr_any(&rs->rs_bound_addr)) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } if (len < sizeof(struct sockaddr_in)) { ret = -EINVAL; goto out; } else if (len < sizeof(struct sockaddr_in6)) { /* Assume IPv4 */ if (copy_from_sockptr(&sin, optval, sizeof(struct sockaddr_in))) { ret = -EFAULT; goto out; } ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); sin6.sin6_port = sin.sin_port; } else { if (copy_from_sockptr(&sin6, optval, sizeof(struct sockaddr_in6))) { ret = -EFAULT; goto out; } } rds_send_drop_to(rs, &sin6); out: return ret; } static int rds_set_bool_option(unsigned char *optvar, sockptr_t optval, int optlen) { int value; if (optlen < sizeof(int)) return -EINVAL; if (copy_from_sockptr(&value, optval, sizeof(int))) return -EFAULT; *optvar = !!value; return 0; } static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen) { int ret; ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen); if (ret == 0) { if (rs->rs_cong_monitor) { rds_cong_add_socket(rs); } else { rds_cong_remove_socket(rs); rs->rs_cong_mask = 0; rs->rs_cong_notify = 0; } } return ret; } static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen) { int t_type; if (rs->rs_transport) return -EOPNOTSUPP; /* previously attached to transport */ if (optlen != sizeof(int)) return -EINVAL; if (copy_from_sockptr(&t_type, optval, sizeof(t_type))) return -EFAULT; if (t_type < 0 || t_type >= RDS_TRANS_COUNT) return -EINVAL; rs->rs_transport = rds_trans_get(t_type); return rs->rs_transport ? 0 : -ENOPROTOOPT; } static int rds_enable_recvtstamp(struct sock *sk, sockptr_t optval, int optlen, int optname) { int val, valbool; if (optlen != sizeof(int)) return -EFAULT; if (copy_from_sockptr(&val, optval, sizeof(int))) return -EFAULT; valbool = val ? 1 : 0; if (optname == SO_TIMESTAMP_NEW) sock_set_flag(sk, SOCK_TSTAMP_NEW); if (valbool) sock_set_flag(sk, SOCK_RCVTSTAMP); else sock_reset_flag(sk, SOCK_RCVTSTAMP); return 0; } static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, int optlen) { struct rds_rx_trace_so trace; int i; if (optlen != sizeof(struct rds_rx_trace_so)) return -EFAULT; if (copy_from_sockptr(&trace, optval, sizeof(trace))) return -EFAULT; if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX) return -EFAULT; rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } rs->rs_rx_trace[i] = trace.rx_trace_pos[i]; } return 0; } static int rds_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); int ret; if (level != SOL_RDS) { ret = -ENOPROTOOPT; goto out; } switch (optname) { case RDS_CANCEL_SENT_TO: ret = rds_cancel_sent_to(rs, optval, optlen); break; case RDS_GET_MR: ret = rds_get_mr(rs, optval, optlen); break; case RDS_GET_MR_FOR_DEST: ret = rds_get_mr_for_dest(rs, optval, optlen); break; case RDS_FREE_MR: ret = rds_free_mr(rs, optval, optlen); break; case RDS_RECVERR: ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen); break; case RDS_CONG_MONITOR: ret = rds_cong_monitor(rs, optval, optlen); break; case SO_RDS_TRANSPORT: lock_sock(sock->sk); ret = rds_set_transport(rs, optval, optlen); release_sock(sock->sk); break; case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: lock_sock(sock->sk); ret = rds_enable_recvtstamp(sock->sk, optval, optlen, optname); release_sock(sock->sk); break; case SO_RDS_MSG_RXPATH_LATENCY: ret = rds_recv_track_latency(rs, optval, optlen); break; default: ret = -ENOPROTOOPT; } out: return ret; } static int rds_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); int ret = -ENOPROTOOPT, len; int trans; if (level != SOL_RDS) goto out; if (get_user(len, optlen)) { ret = -EFAULT; goto out; } switch (optname) { case RDS_INFO_FIRST ... RDS_INFO_LAST: ret = rds_info_getsockopt(sock, optname, optval, optlen); break; case RDS_RECVERR: if (len < sizeof(int)) ret = -EINVAL; else if (put_user(rs->rs_recverr, (int __user *) optval) || put_user(sizeof(int), optlen)) ret = -EFAULT; else ret = 0; break; case SO_RDS_TRANSPORT: if (len < sizeof(int)) { ret = -EINVAL; break; } trans = (rs->rs_transport ? rs->rs_transport->t_type : RDS_TRANS_NONE); /* unbound */ if (put_user(trans, (int __user *)optval) || put_user(sizeof(int), optlen)) ret = -EFAULT; else ret = 0; break; default: break; } out: return ret; } static int rds_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; struct sockaddr_in *sin; struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; if (addr_len < offsetofend(struct sockaddr, sa_family)) return -EINVAL; lock_sock(sk); switch (uaddr->sa_family) { case AF_INET: sin = (struct sockaddr_in *)uaddr; if (addr_len < sizeof(struct sockaddr_in)) { ret = -EINVAL; break; } if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { ret = -EDESTADDRREQ; break; } if (ipv4_is_multicast(sin->sin_addr.s_addr) || sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { ret = -EINVAL; break; } ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr); rs->rs_conn_port = sin->sin_port; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct sockaddr_in6 *sin6; int addr_type; sin6 = (struct sockaddr_in6 *)uaddr; if (addr_len < sizeof(struct sockaddr_in6)) { ret = -EINVAL; break; } addr_type = ipv6_addr_type(&sin6->sin6_addr); if (!(addr_type & IPV6_ADDR_UNICAST)) { __be32 addr4; if (!(addr_type & IPV6_ADDR_MAPPED)) { ret = -EPROTOTYPE; break; } /* It is a mapped address. Need to do some sanity * checks. */ addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || ipv4_is_multicast(addr4)) { ret = -EPROTOTYPE; break; } } if (addr_type & IPV6_ADDR_LINKLOCAL) { /* If socket is arleady bound to a link local address, * the peer address must be on the same link. */ if (sin6->sin6_scope_id == 0 || (!ipv6_addr_any(&rs->rs_bound_addr) && rs->rs_bound_scope_id && sin6->sin6_scope_id != rs->rs_bound_scope_id)) { ret = -EINVAL; break; } /* Remember the connected address scope ID. It will * be checked against the binding local address when * the socket is bound. */ rs->rs_bound_scope_id = sin6->sin6_scope_id; } rs->rs_conn_addr = sin6->sin6_addr; rs->rs_conn_port = sin6->sin6_port; break; } #endif default: ret = -EAFNOSUPPORT; break; } release_sock(sk); return ret; } static struct proto rds_proto = { .name = "RDS", .owner = THIS_MODULE, .obj_size = sizeof(struct rds_sock), }; static const struct proto_ops rds_proto_ops = { .family = AF_RDS, .owner = THIS_MODULE, .release = rds_release, .bind = rds_bind, .connect = rds_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = rds_getname, .poll = rds_poll, .ioctl = rds_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = rds_setsockopt, .getsockopt = rds_getsockopt, .sendmsg = rds_sendmsg, .recvmsg = rds_recvmsg, .mmap = sock_no_mmap, }; static void rds_sock_destruct(struct sock *sk) { struct rds_sock *rs = rds_sk_to_rs(sk); WARN_ON((&rs->rs_item != rs->rs_item.next || &rs->rs_item != rs->rs_item.prev)); } static int __rds_create(struct socket *sock, struct sock *sk, int protocol) { struct rds_sock *rs; sock_init_data(sock, sk); sock->ops = &rds_proto_ops; sk->sk_protocol = protocol; sk->sk_destruct = rds_sock_destruct; rs = rds_sk_to_rs(sk); spin_lock_init(&rs->rs_lock); rwlock_init(&rs->rs_recv_lock); INIT_LIST_HEAD(&rs->rs_send_queue); INIT_LIST_HEAD(&rs->rs_recv_queue); INIT_LIST_HEAD(&rs->rs_notify_queue); INIT_LIST_HEAD(&rs->rs_cong_list); rds_message_zcopy_queue_init(&rs->rs_zcookie_queue); spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; rs->rs_rx_traces = 0; rs->rs_tos = 0; rs->rs_conn = NULL; spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); rds_sock_count++; spin_unlock_bh(&rds_sock_lock); return 0; } static int rds_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern); if (!sk) return -ENOMEM; return __rds_create(sock, sk, protocol); } void rds_sock_addref(struct rds_sock *rs) { sock_hold(rds_rs_to_sk(rs)); } void rds_sock_put(struct rds_sock *rs) { sock_put(rds_rs_to_sk(rs)); } static const struct net_proto_family rds_family_ops = { .family = AF_RDS, .create = rds_create, .owner = THIS_MODULE, }; static void rds_sock_inc_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds_sock *rs; struct rds_incoming *inc; unsigned int total = 0; len /= sizeof(struct rds_info_message); spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { /* This option only supports IPv4 sockets. */ if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) continue; read_lock(&rs->rs_recv_lock); /* XXX too lazy to maintain counts.. */ list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { total++; if (total <= len) rds_inc_info_copy(inc, iter, inc->i_saddr.s6_addr32[3], rs->rs_bound_addr_v4, 1); } read_unlock(&rs->rs_recv_lock); } spin_unlock_bh(&rds_sock_lock); lens->nr = total; lens->each = sizeof(struct rds_info_message); } #if IS_ENABLED(CONFIG_IPV6) static void rds6_sock_inc_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds_incoming *inc; unsigned int total = 0; struct rds_sock *rs; len /= sizeof(struct rds6_info_message); spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { read_lock(&rs->rs_recv_lock); list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { total++; if (total <= len) rds6_inc_info_copy(inc, iter, &inc->i_saddr, &rs->rs_bound_addr, 1); } read_unlock(&rs->rs_recv_lock); } spin_unlock_bh(&rds_sock_lock); lens->nr = total; lens->each = sizeof(struct rds6_info_message); } #endif static void rds_sock_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds_info_socket sinfo; unsigned int cnt = 0; struct rds_sock *rs; len /= sizeof(struct rds_info_socket); spin_lock_bh(&rds_sock_lock); if (len < rds_sock_count) { cnt = rds_sock_count; goto out; } list_for_each_entry(rs, &rds_sock_list, rs_item) { /* This option only supports IPv4 sockets. */ if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) continue; sinfo.sndbuf = rds_sk_sndbuf(rs); sinfo.rcvbuf = rds_sk_rcvbuf(rs); sinfo.bound_addr = rs->rs_bound_addr_v4; sinfo.connected_addr = rs->rs_conn_addr_v4; sinfo.bound_port = rs->rs_bound_port; sinfo.connected_port = rs->rs_conn_port; sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); rds_info_copy(iter, &sinfo, sizeof(sinfo)); cnt++; } out: lens->nr = cnt; lens->each = sizeof(struct rds_info_socket); spin_unlock_bh(&rds_sock_lock); } #if IS_ENABLED(CONFIG_IPV6) static void rds6_sock_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds6_info_socket sinfo6; struct rds_sock *rs; len /= sizeof(struct rds6_info_socket); spin_lock_bh(&rds_sock_lock); if (len < rds_sock_count) goto out; list_for_each_entry(rs, &rds_sock_list, rs_item) { sinfo6.sndbuf = rds_sk_sndbuf(rs); sinfo6.rcvbuf = rds_sk_rcvbuf(rs); sinfo6.bound_addr = rs->rs_bound_addr; sinfo6.connected_addr = rs->rs_conn_addr; sinfo6.bound_port = rs->rs_bound_port; sinfo6.connected_port = rs->rs_conn_port; sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs)); rds_info_copy(iter, &sinfo6, sizeof(sinfo6)); } out: lens->nr = rds_sock_count; lens->each = sizeof(struct rds6_info_socket); spin_unlock_bh(&rds_sock_lock); } #endif static void rds_exit(void) { sock_unregister(rds_family_ops.family); proto_unregister(&rds_proto); rds_conn_exit(); rds_cong_exit(); rds_sysctl_exit(); rds_threads_exit(); rds_stats_exit(); rds_page_exit(); rds_bind_lock_destroy(); rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info); rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); #endif } module_exit(rds_exit); u32 rds_gen_num; static int __init rds_init(void) { int ret; net_get_random_once(&rds_gen_num, sizeof(rds_gen_num)); ret = rds_bind_lock_init(); if (ret) goto out; ret = rds_conn_init(); if (ret) goto out_bind; ret = rds_threads_init(); if (ret) goto out_conn; ret = rds_sysctl_init(); if (ret) goto out_threads; ret = rds_stats_init(); if (ret) goto out_sysctl; ret = proto_register(&rds_proto, 1); if (ret) goto out_stats; ret = sock_register(&rds_family_ops); if (ret) goto out_proto; rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); #if IS_ENABLED(CONFIG_IPV6) rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info); rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); #endif goto out; out_proto: proto_unregister(&rds_proto); out_stats: rds_stats_exit(); out_sysctl: rds_sysctl_exit(); out_threads: rds_threads_exit(); out_conn: rds_conn_exit(); rds_cong_exit(); rds_page_exit(); out_bind: rds_bind_lock_destroy(); out: return ret; } module_init(rds_init); #define DRV_VERSION "4.0" #define DRV_RELDATE "Feb 12, 2009" MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets" " v" DRV_VERSION " (" DRV_RELDATE ")"); MODULE_VERSION(DRV_VERSION); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_NETPROTO(PF_RDS); |
1494 1692 2 3592 2835 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_USER_NAMESPACE_H #define _LINUX_USER_NAMESPACE_H #include <linux/kref.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/rwsem.h> #include <linux/sysctl.h> #include <linux/err.h> #define UID_GID_MAP_MAX_BASE_EXTENTS 5 #define UID_GID_MAP_MAX_EXTENTS 340 struct uid_gid_extent { u32 first; u32 lower_first; u32 count; }; struct uid_gid_map { /* 64 bytes -- 1 cache line */ union { struct { struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; u32 nr_extents; }; struct { struct uid_gid_extent *forward; struct uid_gid_extent *reverse; }; }; }; #define USERNS_SETGROUPS_ALLOWED 1UL #define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED struct ucounts; enum ucount_type { UCOUNT_USER_NAMESPACES, UCOUNT_PID_NAMESPACES, UCOUNT_UTS_NAMESPACES, UCOUNT_IPC_NAMESPACES, UCOUNT_NET_NAMESPACES, UCOUNT_MNT_NAMESPACES, UCOUNT_CGROUP_NAMESPACES, UCOUNT_TIME_NAMESPACES, #ifdef CONFIG_INOTIFY_USER UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_WATCHES, #endif #ifdef CONFIG_FANOTIFY UCOUNT_FANOTIFY_GROUPS, UCOUNT_FANOTIFY_MARKS, #endif UCOUNT_COUNTS, }; enum rlimit_type { UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_RLIMIT_SIGPENDING, UCOUNT_RLIMIT_MEMLOCK, UCOUNT_RLIMIT_COUNTS, }; #if IS_ENABLED(CONFIG_BINFMT_MISC) struct binfmt_misc; #endif struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; struct uid_gid_map projid_map; struct user_namespace *parent; int level; kuid_t owner; kgid_t group; struct ns_common ns; unsigned long flags; /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP * in its effective capability set at the child ns creation time. */ bool parent_could_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of * these pointers is controlled by keyring_sem. Once * user_keyring_register is set, it won't be changed, so it can be * accessed directly with READ_ONCE(). */ struct list_head keyring_name_list; struct key *user_keyring_register; struct rw_semaphore keyring_sem; #endif /* Register of per-UID persistent keyrings for this namespace */ #ifdef CONFIG_PERSISTENT_KEYRINGS struct key *persistent_keyring_register; #endif struct work_struct work; #ifdef CONFIG_SYSCTL struct ctl_table_set set; struct ctl_table_header *sysctls; #endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; #if IS_ENABLED(CONFIG_BINFMT_MISC) struct binfmt_misc *binfmt_misc; #endif } __randomize_layout; struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; atomic_t count; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; extern struct user_namespace init_user_ns; extern struct ucounts init_ucounts; bool setup_userns_sysctls(struct user_namespace *ns); void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { return atomic_long_read(&ucounts->rlimit[type]); } long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, bool override_rlimit); void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); static inline long get_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type) { return READ_ONCE(ns->rlimit_max[type]); } static inline void set_userns_rlimit_max(struct user_namespace *ns, enum rlimit_type type, unsigned long max) { ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX; } #ifdef CONFIG_USER_NS static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) refcount_inc(&ns->ns.count); return ns; } extern int create_user_ns(struct cred *new); extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); extern void __put_user_ns(struct user_namespace *ns); static inline void put_user_ns(struct user_namespace *ns) { if (ns && refcount_dec_and_test(&ns->ns.count)) __put_user_ns(ns); } struct seq_operations; extern const struct seq_operations proc_uid_seq_operations; extern const struct seq_operations proc_gid_seq_operations; extern const struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); extern bool in_userns(const struct user_namespace *ancestor, const struct user_namespace *child); extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; } static inline int create_user_ns(struct cred *new) { return -EINVAL; } static inline int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) { if (unshare_flags & CLONE_NEWUSER) return -EINVAL; return 0; } static inline void put_user_ns(struct user_namespace *ns) { } static inline bool userns_may_setgroups(const struct user_namespace *ns) { return true; } static inline bool in_userns(const struct user_namespace *ancestor, const struct user_namespace *child) { return true; } static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; } static inline struct ns_common *ns_get_owner(struct ns_common *ns) { return ERR_PTR(-EPERM); } #endif #endif /* _LINUX_USER_H */ |
10 10 10 30 56 36 42 3 42 42 15 30 9 46 30 46 10 46 13 54 27 37 35 21 36 33 2 5 24 24 24 57 56 13 57 41 41 24 31 5 3 33 11 15 14 33 32 32 30 32 33 10 5 5 24 23 10 2 33 18 33 31 30 10 7 10 10 10 3 9 57 24 33 14 37 2 37 14 29 29 71 29 42 63 63 6 6 6 6 69 6 63 69 6 63 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2016 Facebook */ #include <linux/cpumask.h> #include <linux/spinlock.h> #include <linux/percpu.h> #include "bpf_lru_list.h" #define LOCAL_FREE_TARGET (128) #define LOCAL_NR_SCANS LOCAL_FREE_TARGET #define PERCPU_FREE_TARGET (4) #define PERCPU_NR_SCANS PERCPU_FREE_TARGET /* Helpers to get the local list index */ #define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET) #define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE) #define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING) #define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET) static int get_next_cpu(int cpu) { cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = cpumask_first(cpu_possible_mask); return cpu; } /* Local list helpers */ static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l) { return &loc_l->lists[LOCAL_FREE_LIST_IDX]; } static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) { return &loc_l->lists[LOCAL_PENDING_LIST_IDX]; } /* bpf_lru_node helpers */ static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) { return READ_ONCE(node->ref); } static void bpf_lru_node_clear_ref(struct bpf_lru_node *node) { WRITE_ONCE(node->ref, 0); } static void bpf_lru_list_count_inc(struct bpf_lru_list *l, enum bpf_lru_list_type type) { if (type < NR_BPF_LRU_LIST_COUNT) l->counts[type]++; } static void bpf_lru_list_count_dec(struct bpf_lru_list *l, enum bpf_lru_list_type type) { if (type < NR_BPF_LRU_LIST_COUNT) l->counts[type]--; } static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l, struct bpf_lru_node *node, struct list_head *free_list, enum bpf_lru_list_type tgt_free_type) { if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) return; /* If the removing node is the next_inactive_rotation candidate, * move the next_inactive_rotation pointer also. */ if (&node->list == l->next_inactive_rotation) l->next_inactive_rotation = l->next_inactive_rotation->prev; bpf_lru_list_count_dec(l, node->type); node->type = tgt_free_type; list_move(&node->list, free_list); } /* Move nodes from local list to the LRU list */ static void __bpf_lru_node_move_in(struct bpf_lru_list *l, struct bpf_lru_node *node, enum bpf_lru_list_type tgt_type) { if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) || WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) return; bpf_lru_list_count_inc(l, tgt_type); node->type = tgt_type; bpf_lru_node_clear_ref(node); list_move(&node->list, &l->lists[tgt_type]); } /* Move nodes between or within active and inactive list (like * active to inactive, inactive to active or tail of active back to * the head of active). */ static void __bpf_lru_node_move(struct bpf_lru_list *l, struct bpf_lru_node *node, enum bpf_lru_list_type tgt_type) { if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) || WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) return; if (node->type != tgt_type) { bpf_lru_list_count_dec(l, node->type); bpf_lru_list_count_inc(l, tgt_type); node->type = tgt_type; } bpf_lru_node_clear_ref(node); /* If the moving node is the next_inactive_rotation candidate, * move the next_inactive_rotation pointer also. */ if (&node->list == l->next_inactive_rotation) l->next_inactive_rotation = l->next_inactive_rotation->prev; list_move(&node->list, &l->lists[tgt_type]); } static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l) { return l->counts[BPF_LRU_LIST_T_INACTIVE] < l->counts[BPF_LRU_LIST_T_ACTIVE]; } /* Rotate the active list: * 1. Start from tail * 2. If the node has the ref bit set, it will be rotated * back to the head of active list with the ref bit cleared. * Give this node one more chance to survive in the active list. * 3. If the ref bit is not set, move it to the head of the * inactive list. * 4. It will at most scan nr_scans nodes */ static void __bpf_lru_list_rotate_active(struct bpf_lru *lru, struct bpf_lru_list *l) { struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE]; struct bpf_lru_node *node, *tmp_node, *first_node; unsigned int i = 0; first_node = list_first_entry(active, struct bpf_lru_node, list); list_for_each_entry_safe_reverse(node, tmp_node, active, list) { if (bpf_lru_node_is_ref(node)) __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); else __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); if (++i == lru->nr_scans || node == first_node) break; } } /* Rotate the inactive list. It starts from the next_inactive_rotation * 1. If the node has ref bit set, it will be moved to the head * of active list with the ref bit cleared. * 2. If the node does not have ref bit set, it will leave it * at its current location (i.e. do nothing) so that it can * be considered during the next inactive_shrink. * 3. It will at most scan nr_scans nodes */ static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru, struct bpf_lru_list *l) { struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; struct list_head *cur, *last, *next = inactive; struct bpf_lru_node *node; unsigned int i = 0; if (list_empty(inactive)) return; last = l->next_inactive_rotation->next; if (last == inactive) last = last->next; cur = l->next_inactive_rotation; while (i < lru->nr_scans) { if (cur == inactive) { cur = cur->prev; continue; } node = list_entry(cur, struct bpf_lru_node, list); next = cur->prev; if (bpf_lru_node_is_ref(node)) __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); if (cur == last) break; cur = next; i++; } l->next_inactive_rotation = next; } /* Shrink the inactive list. It starts from the tail of the * inactive list and only move the nodes without the ref bit * set to the designated free list. */ static unsigned int __bpf_lru_list_shrink_inactive(struct bpf_lru *lru, struct bpf_lru_list *l, unsigned int tgt_nshrink, struct list_head *free_list, enum bpf_lru_list_type tgt_free_type) { struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; struct bpf_lru_node *node, *tmp_node; unsigned int nshrinked = 0; unsigned int i = 0; list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) { if (bpf_lru_node_is_ref(node)) { __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); } else if (lru->del_from_htab(lru->del_arg, node)) { __bpf_lru_node_move_to_free(l, node, free_list, tgt_free_type); if (++nshrinked == tgt_nshrink) break; } if (++i == lru->nr_scans) break; } return nshrinked; } /* 1. Rotate the active list (if needed) * 2. Always rotate the inactive list */ static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l) { if (bpf_lru_list_inactive_low(l)) __bpf_lru_list_rotate_active(lru, l); __bpf_lru_list_rotate_inactive(lru, l); } /* Calls __bpf_lru_list_shrink_inactive() to shrink some * ref-bit-cleared nodes and move them to the designated * free list. * * If it cannot get a free node after calling * __bpf_lru_list_shrink_inactive(). It will just remove * one node from either inactive or active list without * honoring the ref-bit. It prefers inactive list to active * list in this situation. */ static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru, struct bpf_lru_list *l, unsigned int tgt_nshrink, struct list_head *free_list, enum bpf_lru_list_type tgt_free_type) { struct bpf_lru_node *node, *tmp_node; struct list_head *force_shrink_list; unsigned int nshrinked; nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink, free_list, tgt_free_type); if (nshrinked) return nshrinked; /* Do a force shrink by ignoring the reference bit */ if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE])) force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE]; else force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE]; list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list, list) { if (lru->del_from_htab(lru->del_arg, node)) { __bpf_lru_node_move_to_free(l, node, free_list, tgt_free_type); return 1; } } return 0; } /* Flush the nodes from the local pending list to the LRU list */ static void __local_list_flush(struct bpf_lru_list *l, struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node, *tmp_node; list_for_each_entry_safe_reverse(node, tmp_node, local_pending_list(loc_l), list) { if (bpf_lru_node_is_ref(node)) __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE); else __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_INACTIVE); } } static void bpf_lru_list_push_free(struct bpf_lru_list *l, struct bpf_lru_node *node) { unsigned long flags; if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) return; raw_spin_lock_irqsave(&l->lock, flags); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); raw_spin_unlock_irqrestore(&l->lock, flags); } static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l) { struct bpf_lru_list *l = &lru->common_lru.lru_list; struct bpf_lru_node *node, *tmp_node; unsigned int nfree = 0; raw_spin_lock(&l->lock); __local_list_flush(l, loc_l); __bpf_lru_list_rotate(lru, l); list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE], list) { __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); if (++nfree == LOCAL_FREE_TARGET) break; } if (nfree < LOCAL_FREE_TARGET) __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, local_free_list(loc_l), BPF_LRU_LOCAL_LIST_T_FREE); raw_spin_unlock(&l->lock); } static void __local_list_add_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l, int cpu, struct bpf_lru_node *node, u32 hash) { *(u32 *)((void *)node + lru->hash_offset) = hash; node->cpu = cpu; node->type = BPF_LRU_LOCAL_LIST_T_PENDING; bpf_lru_node_clear_ref(node); list_add(&node->list, local_pending_list(loc_l)); } static struct bpf_lru_node * __local_list_pop_free(struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node; node = list_first_entry_or_null(local_free_list(loc_l), struct bpf_lru_node, list); if (node) list_del(&node->list); return node; } static struct bpf_lru_node * __local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l) { struct bpf_lru_node *node; bool force = false; ignore_ref: /* Get from the tail (i.e. older element) of the pending list. */ list_for_each_entry_reverse(node, local_pending_list(loc_l), list) { if ((!bpf_lru_node_is_ref(node) || force) && lru->del_from_htab(lru->del_arg, node)) { list_del(&node->list); return node; } } if (!force) { force = true; goto ignore_ref; } return NULL; } static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, u32 hash) { struct list_head *free_list; struct bpf_lru_node *node = NULL; struct bpf_lru_list *l; unsigned long flags; int cpu = raw_smp_processor_id(); l = per_cpu_ptr(lru->percpu_lru, cpu); raw_spin_lock_irqsave(&l->lock, flags); __bpf_lru_list_rotate(lru, l); free_list = &l->lists[BPF_LRU_LIST_T_FREE]; if (list_empty(free_list)) __bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list, BPF_LRU_LIST_T_FREE); if (!list_empty(free_list)) { node = list_first_entry(free_list, struct bpf_lru_node, list); *(u32 *)((void *)node + lru->hash_offset) = hash; bpf_lru_node_clear_ref(node); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); } raw_spin_unlock_irqrestore(&l->lock, flags); return node; } static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru, u32 hash) { struct bpf_lru_locallist *loc_l, *steal_loc_l; struct bpf_common_lru *clru = &lru->common_lru; struct bpf_lru_node *node; int steal, first_steal; unsigned long flags; int cpu = raw_smp_processor_id(); loc_l = per_cpu_ptr(clru->local_list, cpu); raw_spin_lock_irqsave(&loc_l->lock, flags); node = __local_list_pop_free(loc_l); if (!node) { bpf_lru_list_pop_free_to_local(lru, loc_l); node = __local_list_pop_free(loc_l); } if (node) __local_list_add_pending(lru, loc_l, cpu, node, hash); raw_spin_unlock_irqrestore(&loc_l->lock, flags); if (node) return node; /* No free nodes found from the local free list and * the global LRU list. * * Steal from the local free/pending list of the * current CPU and remote CPU in RR. It starts * with the loc_l->next_steal CPU. */ first_steal = loc_l->next_steal; steal = first_steal; do { steal_loc_l = per_cpu_ptr(clru->local_list, steal); raw_spin_lock_irqsave(&steal_loc_l->lock, flags); node = __local_list_pop_free(steal_loc_l); if (!node) node = __local_list_pop_pending(lru, steal_loc_l); raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags); steal = get_next_cpu(steal); } while (!node && steal != first_steal); loc_l->next_steal = steal; if (node) { raw_spin_lock_irqsave(&loc_l->lock, flags); __local_list_add_pending(lru, loc_l, cpu, node, hash); raw_spin_unlock_irqrestore(&loc_l->lock, flags); } return node; } struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash) { if (lru->percpu) return bpf_percpu_lru_pop_free(lru, hash); else return bpf_common_lru_pop_free(lru, hash); } static void bpf_common_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { u8 node_type = READ_ONCE(node->type); unsigned long flags; if (WARN_ON_ONCE(node_type == BPF_LRU_LIST_T_FREE) || WARN_ON_ONCE(node_type == BPF_LRU_LOCAL_LIST_T_FREE)) return; if (node_type == BPF_LRU_LOCAL_LIST_T_PENDING) { struct bpf_lru_locallist *loc_l; loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu); raw_spin_lock_irqsave(&loc_l->lock, flags); if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) { raw_spin_unlock_irqrestore(&loc_l->lock, flags); goto check_lru_list; } node->type = BPF_LRU_LOCAL_LIST_T_FREE; bpf_lru_node_clear_ref(node); list_move(&node->list, local_free_list(loc_l)); raw_spin_unlock_irqrestore(&loc_l->lock, flags); return; } check_lru_list: bpf_lru_list_push_free(&lru->common_lru.lru_list, node); } static void bpf_percpu_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { struct bpf_lru_list *l; unsigned long flags; l = per_cpu_ptr(lru->percpu_lru, node->cpu); raw_spin_lock_irqsave(&l->lock, flags); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); raw_spin_unlock_irqrestore(&l->lock, flags); } void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { if (lru->percpu) bpf_percpu_lru_push_free(lru, node); else bpf_common_lru_push_free(lru, node); } static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems) { struct bpf_lru_list *l = &lru->common_lru.lru_list; u32 i; for (i = 0; i < nr_elems; i++) { struct bpf_lru_node *node; node = (struct bpf_lru_node *)(buf + node_offset); node->type = BPF_LRU_LIST_T_FREE; bpf_lru_node_clear_ref(node); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf += elem_size; } } static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems) { u32 i, pcpu_entries; int cpu; struct bpf_lru_list *l; pcpu_entries = nr_elems / num_possible_cpus(); i = 0; for_each_possible_cpu(cpu) { struct bpf_lru_node *node; l = per_cpu_ptr(lru->percpu_lru, cpu); again: node = (struct bpf_lru_node *)(buf + node_offset); node->cpu = cpu; node->type = BPF_LRU_LIST_T_FREE; bpf_lru_node_clear_ref(node); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); i++; buf += elem_size; if (i == nr_elems) break; if (i % pcpu_entries) goto again; } } void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems) { if (lru->percpu) bpf_percpu_lru_populate(lru, buf, node_offset, elem_size, nr_elems); else bpf_common_lru_populate(lru, buf, node_offset, elem_size, nr_elems); } static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu) { int i; for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++) INIT_LIST_HEAD(&loc_l->lists[i]); loc_l->next_steal = cpu; raw_spin_lock_init(&loc_l->lock); } static void bpf_lru_list_init(struct bpf_lru_list *l) { int i; for (i = 0; i < NR_BPF_LRU_LIST_T; i++) INIT_LIST_HEAD(&l->lists[i]); for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++) l->counts[i] = 0; l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE]; raw_spin_lock_init(&l->lock); } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, del_from_htab_func del_from_htab, void *del_arg) { int cpu; if (percpu) { lru->percpu_lru = alloc_percpu(struct bpf_lru_list); if (!lru->percpu_lru) return -ENOMEM; for_each_possible_cpu(cpu) { struct bpf_lru_list *l; l = per_cpu_ptr(lru->percpu_lru, cpu); bpf_lru_list_init(l); } lru->nr_scans = PERCPU_NR_SCANS; } else { struct bpf_common_lru *clru = &lru->common_lru; clru->local_list = alloc_percpu(struct bpf_lru_locallist); if (!clru->local_list) return -ENOMEM; for_each_possible_cpu(cpu) { struct bpf_lru_locallist *loc_l; loc_l = per_cpu_ptr(clru->local_list, cpu); bpf_lru_locallist_init(loc_l, cpu); } bpf_lru_list_init(&clru->lru_list); lru->nr_scans = LOCAL_NR_SCANS; } lru->percpu = percpu; lru->del_from_htab = del_from_htab; lru->del_arg = del_arg; lru->hash_offset = hash_offset; return 0; } void bpf_lru_destroy(struct bpf_lru *lru) { if (lru->percpu) free_percpu(lru->percpu_lru); else free_percpu(lru->common_lru.local_list); } |
1 23 1387 581 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NETLINK_H #define __LINUX_NETLINK_H #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/export.h> #include <net/scm.h> #include <uapi/linux/netlink.h> struct net; void do_trace_netlink_extack(const char *msg); static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; } enum netlink_skb_flags { NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */ }; struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ __u32 portid; __u32 dst_group; __u32 flags; struct sock *sk; bool nsid_is_set; int nsid; }; #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) #define NETLINK_CTX_SIZE 48 void netlink_table_grab(void); void netlink_table_ungrab(void); #define NL_CFG_F_NONROOT_RECV (1 << 0) #define NL_CFG_F_NONROOT_SEND (1 << 1) /* optional Netlink kernel configuration parameters */ struct netlink_kernel_cfg { unsigned int groups; unsigned int flags; void (*input)(struct sk_buff *skb); int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); void (*release) (struct sock *sk, unsigned long *groups); }; struct sock *__netlink_kernel_create(struct net *net, int unit, struct module *module, struct netlink_kernel_cfg *cfg); static inline struct sock * netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) { return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } /* this can be increased when necessary - don't expose to userland */ #define NETLINK_MAX_COOKIE_LEN 20 #define NETLINK_MAX_FMTMSG_LEN 80 /** * struct netlink_ext_ack - netlink extended ACK report struct * @_msg: message string to report - don't access directly, use * %NL_SET_ERR_MSG * @bad_attr: attribute with error * @policy: policy for a bad attribute * @miss_type: attribute type which was missing * @miss_nest: nest missing an attribute (%NULL if missing top level attr) * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length * @_msg_buf: output buffer for formatted message strings - don't access * directly, use %NL_SET_ERR_MSG_FMT */ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; const struct nla_policy *policy; const struct nlattr *miss_nest; u16 miss_type; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; char _msg_buf[NETLINK_MAX_FMTMSG_LEN]; }; /* Always use this macro, this allows later putting the * message into a separate section or such for things * like translation or listing all possible messages. * If string formatting is needed use NL_SET_ERR_MSG_FMT. */ #define NL_SET_ERR_MSG(extack, msg) do { \ static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ do_trace_netlink_extack(__msg); \ \ if (__extack) \ __extack->_msg = __msg; \ } while (0) /* We splice fmt with %s at each end even in the snprintf so that both calls * can use the same string constant, avoiding its duplication in .ro */ #define NL_SET_ERR_MSG_FMT(extack, fmt, args...) do { \ struct netlink_ext_ack *__extack = (extack); \ \ if (!__extack) \ break; \ if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ "%s" fmt "%s", "", ##args, "") >= \ NETLINK_MAX_FMTMSG_LEN) \ net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ ##args, "\n"); \ \ do_trace_netlink_extack(__extack->_msg_buf); \ \ __extack->_msg = __extack->_msg_buf; \ } while (0) #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) #define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...) \ NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args) #define NL_SET_ERR_MSG_WEAK(extack, msg) do { \ if ((extack) && !(extack)->_msg) \ NL_SET_ERR_MSG((extack), msg); \ } while (0) #define NL_SET_ERR_MSG_WEAK_MOD(extack, msg) do { \ if ((extack) && !(extack)->_msg) \ NL_SET_ERR_MSG_MOD((extack), msg); \ } while (0) #define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ if ((extack)) { \ (extack)->bad_attr = (attr); \ (extack)->policy = (pol); \ } \ } while (0) #define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL) #define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do { \ static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ do_trace_netlink_extack(__msg); \ \ if (__extack) { \ __extack->_msg = __msg; \ __extack->bad_attr = (attr); \ __extack->policy = (pol); \ } \ } while (0) #define NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, pol, fmt, args...) do { \ struct netlink_ext_ack *__extack = (extack); \ \ if (!__extack) \ break; \ \ if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ "%s" fmt "%s", "", ##args, "") >= \ NETLINK_MAX_FMTMSG_LEN) \ net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ ##args, "\n"); \ \ do_trace_netlink_extack(__extack->_msg_buf); \ \ __extack->_msg = __extack->_msg_buf; \ __extack->bad_attr = (attr); \ __extack->policy = (pol); \ } while (0) #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) #define NL_SET_ERR_MSG_ATTR_FMT(extack, attr, msg, args...) \ NL_SET_ERR_MSG_ATTR_POL_FMT(extack, attr, NULL, msg, ##args) #define NL_SET_ERR_ATTR_MISS(extack, nest, type) do { \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) { \ __extack->miss_nest = (nest); \ __extack->miss_type = (type); \ } \ } while (0) #define NL_REQ_ATTR_CHECK(extack, nest, tb, type) ({ \ struct nlattr **__tb = (tb); \ u32 __attr = (type); \ int __retval; \ \ __retval = !__tb[__attr]; \ if (__retval) \ NL_SET_ERR_ATTR_MISS((extack), (nest), __attr); \ __retval; \ }) static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { if (!extack) return; memcpy(extack->cookie, &cookie, sizeof(cookie)); extack->cookie_len = sizeof(cookie); } void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack); int netlink_has_listeners(struct sock *sk, unsigned int group); bool netlink_strict_get_check(struct sk_buff *skb); int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); typedef int (*netlink_filter_fn)(struct sock *dsk, struct sk_buff *skb, void *data); int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation, netlink_filter_fn filter, void *filter_data); int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ struct sock *netlink_getsockbyfd(int fd); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb); static inline struct sk_buff * netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *nskb; nskb = skb_clone(skb, gfp_mask); if (!nskb) return NULL; /* This is a large skb, set destructor callback to release head */ if (is_vmalloc_addr(skb->head)) nskb->destructor = skb->destructor; return nskb; } /* * skb should fit one page. This choice is good for headerless malloc. * But we should limit to 8K so that userspace does not have to * use enormous buffer sizes on recvmsg() calls just to avoid * MSG_TRUNC when PAGE_SIZE is very large. */ #if PAGE_SIZE < 8192UL #define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(PAGE_SIZE) #else #define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(8192UL) #endif #define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); void *data; /* the module that dump function belong to */ struct module *module; struct netlink_ext_ack *extack; u16 family; u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; int flags; bool strict_check; union { u8 ctx[NETLINK_CTX_SIZE]; /* args is deprecated. Cast a struct over ctx instead * for proper type safety. */ long args[6]; }; }; #define NL_ASSERT_CTX_FITS(type_name) \ BUILD_BUG_ON(sizeof(type_name) > \ sizeof_field(struct netlink_callback, ctx)) struct netlink_notify { struct net *net; u32 portid; int protocol; }; struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); struct netlink_ext_ack *extack; void *data; struct module *module; u32 min_dump_alloc; int flags; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control); static inline int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control) { if (!control->module) control->module = THIS_MODULE; return __netlink_dump_start(ssk, skb, nlh, control); } struct netlink_tap { struct net_device *dev; struct module *module; struct list_head list; }; int netlink_add_tap(struct netlink_tap *nt); int netlink_remove_tap(struct netlink_tap *nt); bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, struct user_namespace *ns, int cap); bool netlink_ns_capable(const struct sk_buff *skb, struct user_namespace *ns, int cap); bool netlink_capable(const struct sk_buff *skb, int cap); bool netlink_net_capable(const struct sk_buff *skb, int cap); struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast); #endif /* __LINUX_NETLINK_H */ |
32 38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM alarmtimer #if !defined(_TRACE_ALARMTIMER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_ALARMTIMER_H #include <linux/alarmtimer.h> #include <linux/rtc.h> #include <linux/tracepoint.h> TRACE_DEFINE_ENUM(ALARM_REALTIME); TRACE_DEFINE_ENUM(ALARM_BOOTTIME); TRACE_DEFINE_ENUM(ALARM_REALTIME_FREEZER); TRACE_DEFINE_ENUM(ALARM_BOOTTIME_FREEZER); #define show_alarm_type(type) __print_flags(type, " | ", \ { 1 << ALARM_REALTIME, "REALTIME" }, \ { 1 << ALARM_BOOTTIME, "BOOTTIME" }, \ { 1 << ALARM_REALTIME_FREEZER, "REALTIME Freezer" }, \ { 1 << ALARM_BOOTTIME_FREEZER, "BOOTTIME Freezer" }) TRACE_EVENT(alarmtimer_suspend, TP_PROTO(ktime_t expires, int flag), TP_ARGS(expires, flag), TP_STRUCT__entry( __field(s64, expires) __field(unsigned char, alarm_type) ), TP_fast_assign( __entry->expires = expires; __entry->alarm_type = flag; ), TP_printk("alarmtimer type:%s expires:%llu", show_alarm_type((1 << __entry->alarm_type)), __entry->expires ) ); DECLARE_EVENT_CLASS(alarm_class, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now), TP_STRUCT__entry( __field(void *, alarm) __field(unsigned char, alarm_type) __field(s64, expires) __field(s64, now) ), TP_fast_assign( __entry->alarm = alarm; __entry->alarm_type = alarm->type; __entry->expires = alarm->node.expires; __entry->now = now; ), TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu", __entry->alarm, show_alarm_type((1 << __entry->alarm_type)), __entry->expires, __entry->now ) ); DEFINE_EVENT(alarm_class, alarmtimer_fired, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); DEFINE_EVENT(alarm_class, alarmtimer_start, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); DEFINE_EVENT(alarm_class, alarmtimer_cancel, TP_PROTO(struct alarm *alarm, ktime_t now), TP_ARGS(alarm, now) ); #endif /* _TRACE_ALARMTIMER_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
202 202 10 192 157 155 12 143 7 148 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | // SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter Bus specific functions * * Copyright (C) 2002 Greg Kroah-Hartman (greg@kroah.com) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/tty.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> static int usb_serial_device_match(struct device *dev, const struct device_driver *drv) { const struct usb_serial_port *port = to_usb_serial_port(dev); struct usb_serial_driver *driver = to_usb_serial_driver(drv); /* * drivers are already assigned to ports in serial_probe so it's * a simple check here. */ if (driver == port->serial->type) return 1; return 0; } static int usb_serial_device_probe(struct device *dev) { struct usb_serial_port *port = to_usb_serial_port(dev); struct usb_serial_driver *driver; struct device *tty_dev; int retval = 0; int minor; /* make sure suspend/resume doesn't race against port_probe */ retval = usb_autopm_get_interface(port->serial->interface); if (retval) return retval; driver = port->serial->type; if (driver->port_probe) { retval = driver->port_probe(port); if (retval) goto err_autopm_put; } minor = port->minor; tty_dev = tty_port_register_device(&port->port, usb_serial_tty_driver, minor, dev); if (IS_ERR(tty_dev)) { retval = PTR_ERR(tty_dev); goto err_port_remove; } usb_autopm_put_interface(port->serial->interface); dev_info(&port->serial->dev->dev, "%s converter now attached to ttyUSB%d\n", driver->description, minor); return 0; err_port_remove: if (driver->port_remove) driver->port_remove(port); err_autopm_put: usb_autopm_put_interface(port->serial->interface); return retval; } static void usb_serial_device_remove(struct device *dev) { struct usb_serial_port *port = to_usb_serial_port(dev); struct usb_serial_driver *driver; int minor; int autopm_err; /* * Make sure suspend/resume doesn't race against port_remove. * * Note that no further runtime PM callbacks will be made if * autopm_get fails. */ autopm_err = usb_autopm_get_interface(port->serial->interface); minor = port->minor; tty_unregister_device(usb_serial_tty_driver, minor); driver = port->serial->type; if (driver->port_remove) driver->port_remove(port); dev_info(dev, "%s converter now disconnected from ttyUSB%d\n", driver->description, minor); if (!autopm_err) usb_autopm_put_interface(port->serial->interface); } static ssize_t new_id_store(struct device_driver *driver, const char *buf, size_t count) { struct usb_serial_driver *usb_drv = to_usb_serial_driver(driver); ssize_t retval = usb_store_new_id(&usb_drv->dynids, usb_drv->id_table, driver, buf, count); if (retval >= 0 && usb_drv->usb_driver != NULL) retval = usb_store_new_id(&usb_drv->usb_driver->dynids, usb_drv->usb_driver->id_table, &usb_drv->usb_driver->driver, buf, count); return retval; } static ssize_t new_id_show(struct device_driver *driver, char *buf) { struct usb_serial_driver *usb_drv = to_usb_serial_driver(driver); return usb_show_dynids(&usb_drv->dynids, buf); } static DRIVER_ATTR_RW(new_id); static struct attribute *usb_serial_drv_attrs[] = { &driver_attr_new_id.attr, NULL, }; ATTRIBUTE_GROUPS(usb_serial_drv); static void free_dynids(struct usb_serial_driver *drv) { struct usb_dynid *dynid, *n; guard(mutex)(&usb_dynids_lock); list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { list_del(&dynid->node); kfree(dynid); } } const struct bus_type usb_serial_bus_type = { .name = "usb-serial", .match = usb_serial_device_match, .probe = usb_serial_device_probe, .remove = usb_serial_device_remove, .drv_groups = usb_serial_drv_groups, }; int usb_serial_bus_register(struct usb_serial_driver *driver) { int retval; driver->driver.bus = &usb_serial_bus_type; INIT_LIST_HEAD(&driver->dynids.list); retval = driver_register(&driver->driver); return retval; } void usb_serial_bus_deregister(struct usb_serial_driver *driver) { free_dynids(driver); driver_unregister(&driver->driver); } |
11 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_error.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" /* * XFS logging functions */ static void __xfs_printk( const char *level, const struct xfs_mount *mp, struct va_format *vaf) { if (mp && mp->m_super) { printk("%sXFS (%s): %pV\n", level, mp->m_super->s_id, vaf); return; } printk("%sXFS: %pV\n", level, vaf); } void xfs_printk_level( const char *kern_level, const struct xfs_mount *mp, const char *fmt, ...) { struct va_format vaf; va_list args; int level; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; __xfs_printk(kern_level, mp, &vaf); va_end(args); if (!kstrtoint(kern_level, 0, &level) && level <= LOGLEVEL_ERR && xfs_error_level >= XFS_ERRLEVEL_HIGH) xfs_stack_trace(); } void _xfs_alert_tag( const struct xfs_mount *mp, uint32_t panic_tag, const char *fmt, ...) { struct va_format vaf; va_list args; int do_panic = 0; if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { xfs_alert(mp, "Transforming an alert into a BUG."); do_panic = 1; } va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; __xfs_printk(KERN_ALERT, mp, &vaf); va_end(args); BUG_ON(do_panic); } void asswarn( struct xfs_mount *mp, char *expr, char *file, int line) { xfs_warn(mp, "Assertion failed: %s, file: %s, line: %d", expr, file, line); WARN_ON(1); } void assfail( struct xfs_mount *mp, char *expr, char *file, int line) { xfs_emerg(mp, "Assertion failed: %s, file: %s, line: %d", expr, file, line); if (xfs_globals.bug_on_assert) BUG(); else WARN_ON(1); } void xfs_hex_dump(const void *p, int length) { print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); } void xfs_buf_alert_ratelimited( struct xfs_buf *bp, const char *rlmsg, const char *fmt, ...) { struct xfs_mount *mp = bp->b_mount; struct va_format vaf; va_list args; /* use the more aggressive per-target rate limit for buffers */ if (!___ratelimit(&bp->b_target->bt_ioerror_rl, rlmsg)) return; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; __xfs_printk(KERN_ALERT, mp, &vaf); va_end(args); } void xfs_warn_experimental( struct xfs_mount *mp, enum xfs_experimental_feat feat) { static const struct { const char *name; long opstate; } features[] = { [XFS_EXPERIMENTAL_PNFS] = { .opstate = XFS_OPSTATE_WARNED_PNFS, .name = "pNFS", }, [XFS_EXPERIMENTAL_SCRUB] = { .opstate = XFS_OPSTATE_WARNED_SCRUB, .name = "online scrub", }, [XFS_EXPERIMENTAL_SHRINK] = { .opstate = XFS_OPSTATE_WARNED_SHRINK, .name = "online shrink", }, [XFS_EXPERIMENTAL_LARP] = { .opstate = XFS_OPSTATE_WARNED_LARP, .name = "logged extended attributes", }, [XFS_EXPERIMENTAL_LBS] = { .opstate = XFS_OPSTATE_WARNED_LBS, .name = "large block size", }, [XFS_EXPERIMENTAL_EXCHRANGE] = { .opstate = XFS_OPSTATE_WARNED_EXCHRANGE, .name = "exchange range", }, [XFS_EXPERIMENTAL_PPTR] = { .opstate = XFS_OPSTATE_WARNED_PPTR, .name = "parent pointer", }, [XFS_EXPERIMENTAL_METADIR] = { .opstate = XFS_OPSTATE_WARNED_METADIR, .name = "metadata directory tree", }, }; ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX); BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX); if (xfs_should_warn(mp, features[feat].opstate)) xfs_warn(mp, "EXPERIMENTAL %s feature enabled. Use at your own risk!", features[feat].name); } |
25 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_blackhole.c Black hole queue * * Authors: Thomas Graf <tgraf@suug.ch> * * Note: Quantum tunneling is not supported. */ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { qdisc_drop(skb, sch, to_free); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } static struct sk_buff *blackhole_dequeue(struct Qdisc *sch) { return NULL; } static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = { .id = "blackhole", .priv_size = 0, .enqueue = blackhole_enqueue, .dequeue = blackhole_dequeue, .peek = blackhole_dequeue, .owner = THIS_MODULE, }; static int __init blackhole_init(void) { return register_qdisc(&blackhole_qdisc_ops); } device_initcall(blackhole_init) |
5 3 4 2 2 3 3 3 1 1 1 6 6 5 2148 2131 25 11 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | /* Copyright 2011, Siemens AG * written by Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ /* Based on patches from Jon Smirl <jonsmirl@gmail.com> * Copyright (c) 2011 Jon Smirl <jonsmirl@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ /* Jon's code is based on 6lowpan implementation for Contiki which is: * Copyright (c) 2008, Swedish Institute of Computer Science. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the Institute nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/ieee802154.h> #include <linux/if_arp.h> #include <net/ipv6.h> #include "6lowpan_i.h" static int open_count; static const struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; static int lowpan_dev_init(struct net_device *ldev) { netdev_lockdep_set_classes(ldev); return 0; } static int lowpan_open(struct net_device *dev) { if (!open_count) lowpan_rx_init(); open_count++; return 0; } static int lowpan_stop(struct net_device *dev) { open_count--; if (!open_count) lowpan_rx_exit(); return 0; } static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) { struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n)); /* default no short_addr is available for a neighbour */ neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); return 0; } static int lowpan_get_iflink(const struct net_device *dev) { return READ_ONCE(lowpan_802154_dev(dev)->wdev->ifindex); } static const struct net_device_ops lowpan_netdev_ops = { .ndo_init = lowpan_dev_init, .ndo_start_xmit = lowpan_xmit, .ndo_open = lowpan_open, .ndo_stop = lowpan_stop, .ndo_neigh_construct = lowpan_neigh_construct, .ndo_get_iflink = lowpan_get_iflink, }; static void lowpan_setup(struct net_device *ldev) { memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN); /* We need an ipv6hdr as minimum len when calling xmit */ ldev->hard_header_len = sizeof(struct ipv6hdr); ldev->flags = IFF_BROADCAST | IFF_MULTICAST; ldev->priv_flags |= IFF_NO_QUEUE; ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; ldev->needs_free_netdev = true; ldev->netns_local = true; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != IEEE802154_ADDR_LEN) return -EINVAL; } return 0; } static int lowpan_newlink(struct net *src_net, struct net_device *ldev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct net_device *wdev; int ret; ASSERT_RTNL(); pr_debug("adding new link\n"); if (!tb[IFLA_LINK]) return -EINVAL; /* find and hold wpan device */ wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK])); if (!wdev) return -ENODEV; if (wdev->type != ARPHRD_IEEE802154) { dev_put(wdev); return -EINVAL; } if (wdev->ieee802154_ptr->lowpan_dev) { dev_put(wdev); return -EBUSY; } lowpan_802154_dev(ldev)->wdev = wdev; /* Set the lowpan hardware address to the wpan hardware address. */ __dev_addr_set(ldev, wdev->dev_addr, IEEE802154_ADDR_LEN); /* We need headroom for possible wpan_dev_hard_header call and tailroom * for encryption/fcs handling. The lowpan interface will replace * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN * header has LOWPAN_IPHC_MAX_HEADER_LEN more bytes than the IPv6 * header. */ ldev->needed_headroom = LOWPAN_IPHC_MAX_HEADER_LEN + wdev->needed_headroom; ldev->needed_tailroom = wdev->needed_tailroom; ldev->neigh_priv_len = sizeof(struct lowpan_802154_neigh); ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154); if (ret < 0) { dev_put(wdev); return ret; } wdev->ieee802154_ptr->lowpan_dev = ldev; return 0; } static void lowpan_dellink(struct net_device *ldev, struct list_head *head) { struct net_device *wdev = lowpan_802154_dev(ldev)->wdev; ASSERT_RTNL(); wdev->ieee802154_ptr->lowpan_dev = NULL; lowpan_unregister_netdevice(ldev); dev_put(wdev); } static struct rtnl_link_ops lowpan_link_ops __read_mostly = { .kind = "lowpan", .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_802154_dev)), .setup = lowpan_setup, .newlink = lowpan_newlink, .dellink = lowpan_dellink, .validate = lowpan_validate, }; static inline int __init lowpan_netlink_init(void) { return rtnl_link_register(&lowpan_link_ops); } static inline void lowpan_netlink_fini(void) { rtnl_link_unregister(&lowpan_link_ops); } static int lowpan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct wpan_dev *wpan_dev; if (ndev->type != ARPHRD_IEEE802154) return NOTIFY_DONE; wpan_dev = ndev->ieee802154_ptr; if (!wpan_dev) return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: /* Check if wpan interface is unregistered that we * also delete possible lowpan interfaces which belongs * to the wpan interface. */ if (wpan_dev->lowpan_dev) lowpan_dellink(wpan_dev->lowpan_dev, NULL); break; default: return NOTIFY_DONE; } return NOTIFY_OK; } static struct notifier_block lowpan_dev_notifier = { .notifier_call = lowpan_device_event, }; static int __init lowpan_init_module(void) { int err = 0; err = lowpan_net_frag_init(); if (err < 0) goto out; err = lowpan_netlink_init(); if (err < 0) goto out_frag; err = register_netdevice_notifier(&lowpan_dev_notifier); if (err < 0) goto out_pack; return 0; out_pack: lowpan_netlink_fini(); out_frag: lowpan_net_frag_exit(); out: return err; } static void __exit lowpan_cleanup_module(void) { lowpan_netlink_fini(); lowpan_net_frag_exit(); unregister_netdevice_notifier(&lowpan_dev_notifier); } module_init(lowpan_init_module); module_exit(lowpan_cleanup_module); MODULE_DESCRIPTION("IPv6 over Low power Wireless Personal Area Network IEEE 802.15.4 core"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("lowpan"); |
120 363 375 96 3 377 62 62 21 62 62 17 13 38 22 17 13 17 449 417 362 56 417 417 62 21 377 377 59 62 416 38 38 19 3 22 2 15 17 6 1 38 383 37 38 38 54 1 54 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2008-2014 Mathieu Desnoyers */ #include <linux/module.h> #include <linux/mutex.h> #include <linux/types.h> #include <linux/jhash.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/tracepoint.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/static_key.h> enum tp_func_state { TP_FUNC_0, TP_FUNC_1, TP_FUNC_2, TP_FUNC_N, }; extern tracepoint_ptr_t __start___tracepoints_ptrs[]; extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; enum tp_transition_sync { TP_TRANSITION_SYNC_1_0_1, TP_TRANSITION_SYNC_N_2_1, _NR_TP_TRANSITION_SYNC, }; struct tp_transition_snapshot { unsigned long rcu; bool ongoing; }; /* Protected by tracepoints_mutex */ static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; static void tp_rcu_get_state(enum tp_transition_sync sync) { struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; /* Keep the latest get_state snapshot. */ snapshot->rcu = get_state_synchronize_rcu(); snapshot->ongoing = true; } static void tp_rcu_cond_sync(enum tp_transition_sync sync) { struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; if (!snapshot->ongoing) return; cond_synchronize_rcu(snapshot->rcu); snapshot->ongoing = false; } /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; #ifdef CONFIG_MODULES /* * Tracepoint module list mutex protects the local module list. */ static DEFINE_MUTEX(tracepoint_module_list_mutex); /* Local list of struct tp_module */ static LIST_HEAD(tracepoint_module_list); #endif /* CONFIG_MODULES */ /* * tracepoints_mutex protects the builtin and module tracepoints. * tracepoints_mutex nests inside tracepoint_module_list_mutex. */ static DEFINE_MUTEX(tracepoints_mutex); /* * Note about RCU : * It is used to delay the free of multiple probes array until a quiescent * state is reached. */ struct tp_probes { struct rcu_head rcu; struct tracepoint_func probes[]; }; /* Called in removal of a func but failed to allocate a new tp_funcs */ static void tp_stub_func(void) { return; } static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), GFP_KERNEL); return p == NULL ? NULL : p->probes; } static void rcu_free_old_probes(struct rcu_head *head) { kfree(container_of(head, struct tp_probes, rcu)); } static inline void release_probes(struct tracepoint *tp, struct tracepoint_func *old) { if (old) { struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); if (tracepoint_is_faultable(tp)) call_rcu_tasks_trace(&tp_probes->rcu, rcu_free_old_probes); else call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } static void debug_print_probes(struct tracepoint_func *funcs) { int i; if (!tracepoint_debug || !funcs) return; for (i = 0; funcs[i].func; i++) printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func); } static struct tracepoint_func * func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, int prio) { struct tracepoint_func *old, *new; int iter_probes; /* Iterate over old probe array. */ int nr_probes = 0; /* Counter for probes */ int pos = -1; /* Insertion position into new array */ if (WARN_ON(!tp_func->func)) return ERR_PTR(-EINVAL); debug_print_probes(*funcs); old = *funcs; if (old) { /* (N -> N+1), (N != 0, 1) probes */ for (iter_probes = 0; old[iter_probes].func; iter_probes++) { if (old[iter_probes].func == tp_stub_func) continue; /* Skip stub functions. */ if (old[iter_probes].func == tp_func->func && old[iter_probes].data == tp_func->data) return ERR_PTR(-EEXIST); nr_probes++; } } /* + 2 : one for new probe, one for NULL func */ new = allocate_probes(nr_probes + 2); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { nr_probes = 0; for (iter_probes = 0; old[iter_probes].func; iter_probes++) { if (old[iter_probes].func == tp_stub_func) continue; /* Insert before probes of lower priority */ if (pos < 0 && old[iter_probes].prio < prio) pos = nr_probes++; new[nr_probes++] = old[iter_probes]; } if (pos < 0) pos = nr_probes++; /* nr_probes now points to the end of the new array */ } else { pos = 0; nr_probes = 1; /* must point at end of array */ } new[pos] = *tp_func; new[nr_probes].func = NULL; *funcs = new; debug_print_probes(*funcs); return old; } static void *func_remove(struct tracepoint_func **funcs, struct tracepoint_func *tp_func) { int nr_probes = 0, nr_del = 0, i; struct tracepoint_func *old, *new; old = *funcs; if (!old) return ERR_PTR(-ENOENT); debug_print_probes(*funcs); /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { if ((old[nr_probes].func == tp_func->func && old[nr_probes].data == tp_func->data) || old[nr_probes].func == tp_stub_func) nr_del++; } } /* * If probe is NULL, then nr_probes = nr_del = 0, and then the * entire entry will be removed. */ if (nr_probes - nr_del == 0) { /* N -> 0, (N > 1) */ *funcs = NULL; debug_print_probes(*funcs); return old; } else { int j = 0; /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); if (new) { for (i = 0; old[i].func; i++) { if ((old[i].func != tp_func->func || old[i].data != tp_func->data) && old[i].func != tp_stub_func) new[j++] = old[i]; } new[nr_probes - nr_del].func = NULL; *funcs = new; } else { /* * Failed to allocate, replace the old function * with calls to tp_stub_func. */ for (i = 0; old[i].func; i++) { if (old[i].func == tp_func->func && old[i].data == tp_func->data) WRITE_ONCE(old[i].func, tp_stub_func); } *funcs = old; } } debug_print_probes(*funcs); return old; } /* * Count the number of functions (enum tp_func_state) in a tp_funcs array. */ static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs) { if (!tp_funcs) return TP_FUNC_0; if (!tp_funcs[1].func) return TP_FUNC_1; if (!tp_funcs[2].func) return TP_FUNC_2; return TP_FUNC_N; /* 3 or more */ } static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) { void *func = tp->iterator; /* Synthetic events do not have static call sites */ if (!tp->static_call_key) return; if (nr_func_state(tp_funcs) == TP_FUNC_1) func = tp_funcs[0].func; __static_call_update(tp->static_call_key, tp->static_call_tramp, func); } /* * Add the probe function to a tracepoint. */ static int tracepoint_add_func(struct tracepoint *tp, struct tracepoint_func *func, int prio, bool warn) { struct tracepoint_func *old, *tp_funcs; int ret; if (tp->ext && tp->ext->regfunc && !static_key_enabled(&tp->key)) { ret = tp->ext->regfunc(); if (ret < 0) return ret; } tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_add(&tp_funcs, func, prio); if (IS_ERR(old)) { WARN_ON_ONCE(warn && PTR_ERR(old) != -ENOMEM); return PTR_ERR(old); } /* * rcu_assign_pointer has as smp_store_release() which makes sure * that the new probe callbacks array is consistent before setting * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ switch (nr_func_state(tp_funcs)) { case TP_FUNC_1: /* 0->1 */ /* * Make sure new static func never uses old data after a * 1->0->1 transition sequence. */ tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, tp_funcs); static_branch_enable(&tp->key); break; case TP_FUNC_2: /* 1->2 */ /* Set iterator static call */ tracepoint_update_call(tp, tp_funcs); /* * Iterator callback installed before updating tp->funcs. * Requires ordering between RCU assign/dereference and * static call update/call. */ fallthrough; case TP_FUNC_N: /* N->N+1 (N>1) */ rcu_assign_pointer(tp->funcs, tp_funcs); /* * Make sure static func never uses incorrect data after a * N->...->2->1 (N>1) transition sequence. */ if (tp_funcs[0].data != old[0].data) tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); break; } release_probes(tp, old); return 0; } /* * Remove a probe function from a tracepoint. * Note: only waiting an RCU period after setting elem->call to the empty * function insures that the original callback is not used anymore. This insured * by preempt_disable around the call site. */ static int tracepoint_remove_func(struct tracepoint *tp, struct tracepoint_func *func) { struct tracepoint_func *old, *tp_funcs; tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); if (tp_funcs == old) /* Failed allocating new tp_funcs, replaced func with stub */ return 0; switch (nr_func_state(tp_funcs)) { case TP_FUNC_0: /* 1->0 */ /* Removed last function */ if (tp->ext && tp->ext->unregfunc && static_key_enabled(&tp->key)) tp->ext->unregfunc(); static_branch_disable(&tp->key); /* Set iterator static call */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, NULL); /* * Make sure new static func never uses old data after a * 1->0->1 transition sequence. */ tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1); break; case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); /* * Make sure static func never uses incorrect data after a * N->...->2->1 (N>2) transition sequence. If the first * element's data has changed, then force the synchronization * to prevent current readers that have loaded the old data * from calling the new function. */ if (tp_funcs[0].data != old[0].data) tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); break; case TP_FUNC_2: /* N->N-1 (N>2) */ fallthrough; case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); /* * Make sure static func never uses incorrect data after a * N->...->2->1 (N>2) transition sequence. */ if (tp_funcs[0].data != old[0].data) tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); break; } release_probes(tp, old); return 0; } /** * tracepoint_probe_register_prio_may_exist - Connect a probe to a tracepoint with priority * @tp: tracepoint * @probe: probe handler * @data: tracepoint data * @prio: priority of this function over other registered functions * * Same as tracepoint_probe_register_prio() except that it will not warn * if the tracepoint is already registered. */ int tracepoint_probe_register_prio_may_exist(struct tracepoint *tp, void *probe, void *data, int prio) { struct tracepoint_func tp_func; int ret; mutex_lock(&tracepoints_mutex); tp_func.func = probe; tp_func.data = data; tp_func.prio = prio; ret = tracepoint_add_func(tp, &tp_func, prio, false); mutex_unlock(&tracepoints_mutex); return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio_may_exist); /** * tracepoint_probe_register_prio - Connect a probe to a tracepoint with priority * @tp: tracepoint * @probe: probe handler * @data: tracepoint data * @prio: priority of this function over other registered functions * * Returns 0 if ok, error value on error. * Note: if @tp is within a module, the caller is responsible for * unregistering the probe before the module is gone. This can be * performed either with a tracepoint module going notifier, or from * within module exit functions. */ int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data, int prio) { struct tracepoint_func tp_func; int ret; mutex_lock(&tracepoints_mutex); tp_func.func = probe; tp_func.data = data; tp_func.prio = prio; ret = tracepoint_add_func(tp, &tp_func, prio, true); mutex_unlock(&tracepoints_mutex); return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio); /** * tracepoint_probe_register - Connect a probe to a tracepoint * @tp: tracepoint * @probe: probe handler * @data: tracepoint data * * Returns 0 if ok, error value on error. * Note: if @tp is within a module, the caller is responsible for * unregistering the probe before the module is gone. This can be * performed either with a tracepoint module going notifier, or from * within module exit functions. */ int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data) { return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO); } EXPORT_SYMBOL_GPL(tracepoint_probe_register); /** * tracepoint_probe_unregister - Disconnect a probe from a tracepoint * @tp: tracepoint * @probe: probe function pointer * @data: tracepoint data * * Returns 0 if ok, error value on error. */ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) { struct tracepoint_func tp_func; int ret; mutex_lock(&tracepoints_mutex); tp_func.func = probe; tp_func.data = data; ret = tracepoint_remove_func(tp, &tp_func); mutex_unlock(&tracepoints_mutex); return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); static void for_each_tracepoint_range( tracepoint_ptr_t *begin, tracepoint_ptr_t *end, void (*fct)(struct tracepoint *tp, void *priv), void *priv) { tracepoint_ptr_t *iter; if (!begin) return; for (iter = begin; iter < end; iter++) fct(tracepoint_ptr_deref(iter), priv); } #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) | (1 << TAINT_UNSIGNED_MODULE) | (1 << TAINT_TEST) | (1 << TAINT_LIVEPATCH)); } static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list); /** * register_tracepoint_module_notifier - register tracepoint coming/going notifier * @nb: notifier block * * Notifiers registered with this function are called on module * coming/going with the tracepoint_module_list_mutex held. * The notifier block callback should expect a "struct tp_module" data * pointer. */ int register_tracepoint_module_notifier(struct notifier_block *nb) { struct tp_module *tp_mod; int ret; mutex_lock(&tracepoint_module_list_mutex); ret = blocking_notifier_chain_register(&tracepoint_notify_list, nb); if (ret) goto end; list_for_each_entry(tp_mod, &tracepoint_module_list, list) (void) nb->notifier_call(nb, MODULE_STATE_COMING, tp_mod); end: mutex_unlock(&tracepoint_module_list_mutex); return ret; } EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier); /** * unregister_tracepoint_module_notifier - unregister tracepoint coming/going notifier * @nb: notifier block * * The notifier block callback should expect a "struct tp_module" data * pointer. */ int unregister_tracepoint_module_notifier(struct notifier_block *nb) { struct tp_module *tp_mod; int ret; mutex_lock(&tracepoint_module_list_mutex); ret = blocking_notifier_chain_unregister(&tracepoint_notify_list, nb); if (ret) goto end; list_for_each_entry(tp_mod, &tracepoint_module_list, list) (void) nb->notifier_call(nb, MODULE_STATE_GOING, tp_mod); end: mutex_unlock(&tracepoint_module_list_mutex); return ret; } EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier); /* * Ensure the tracer unregistered the module's probes before the module * teardown is performed. Prevents leaks of probe and data pointers. */ static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) { WARN_ON_ONCE(tp->funcs); } static int tracepoint_module_coming(struct module *mod) { struct tp_module *tp_mod; if (!mod->num_tracepoints) return 0; /* * We skip modules that taint the kernel, especially those with different * module headers (for forced load), to make sure we don't cause a crash. * Staging, out-of-tree, unsigned GPL, and test modules are fine. */ if (trace_module_has_bad_taint(mod)) return 0; tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); if (!tp_mod) return -ENOMEM; tp_mod->mod = mod; mutex_lock(&tracepoint_module_list_mutex); list_add_tail(&tp_mod->list, &tracepoint_module_list); blocking_notifier_call_chain(&tracepoint_notify_list, MODULE_STATE_COMING, tp_mod); mutex_unlock(&tracepoint_module_list_mutex); return 0; } static void tracepoint_module_going(struct module *mod) { struct tp_module *tp_mod; if (!mod->num_tracepoints) return; mutex_lock(&tracepoint_module_list_mutex); list_for_each_entry(tp_mod, &tracepoint_module_list, list) { if (tp_mod->mod == mod) { blocking_notifier_call_chain(&tracepoint_notify_list, MODULE_STATE_GOING, tp_mod); list_del(&tp_mod->list); kfree(tp_mod); /* * Called the going notifier before checking for * quiescence. */ for_each_tracepoint_range(mod->tracepoints_ptrs, mod->tracepoints_ptrs + mod->num_tracepoints, tp_module_going_check_quiescent, NULL); break; } } /* * In the case of modules that were tainted at "coming", we'll simply * walk through the list without finding it. We cannot use the "tainted" * flag on "going", in case a module taints the kernel only after being * loaded. */ mutex_unlock(&tracepoint_module_list_mutex); } static int tracepoint_module_notify(struct notifier_block *self, unsigned long val, void *data) { struct module *mod = data; int ret = 0; switch (val) { case MODULE_STATE_COMING: ret = tracepoint_module_coming(mod); break; case MODULE_STATE_LIVE: break; case MODULE_STATE_GOING: tracepoint_module_going(mod); break; case MODULE_STATE_UNFORMED: break; } return notifier_from_errno(ret); } static struct notifier_block tracepoint_module_nb = { .notifier_call = tracepoint_module_notify, .priority = 0, }; static __init int init_tracepoints(void) { int ret; ret = register_module_notifier(&tracepoint_module_nb); if (ret) pr_warn("Failed to register tracepoint module enter notifier\n"); return ret; } __initcall(init_tracepoints); /** * for_each_tracepoint_in_module - iteration on all tracepoints in a module * @mod: module * @fct: callback * @priv: private data */ void for_each_tracepoint_in_module(struct module *mod, void (*fct)(struct tracepoint *tp, struct module *mod, void *priv), void *priv) { tracepoint_ptr_t *begin, *end, *iter; lockdep_assert_held(&tracepoint_module_list_mutex); if (!mod) return; begin = mod->tracepoints_ptrs; end = mod->tracepoints_ptrs + mod->num_tracepoints; for (iter = begin; iter < end; iter++) fct(tracepoint_ptr_deref(iter), mod, priv); } /** * for_each_module_tracepoint - iteration on all tracepoints in all modules * @fct: callback * @priv: private data */ void for_each_module_tracepoint(void (*fct)(struct tracepoint *tp, struct module *mod, void *priv), void *priv) { struct tp_module *tp_mod; mutex_lock(&tracepoint_module_list_mutex); list_for_each_entry(tp_mod, &tracepoint_module_list, list) for_each_tracepoint_in_module(tp_mod->mod, fct, priv); mutex_unlock(&tracepoint_module_list_mutex); } #endif /* CONFIG_MODULES */ /** * for_each_kernel_tracepoint - iteration on all kernel tracepoints * @fct: callback * @priv: private data */ void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), void *priv) { for_each_tracepoint_range(__start___tracepoints_ptrs, __stop___tracepoints_ptrs, fct, priv); } EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint); #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ static int sys_tracepoint_refcount; int syscall_regfunc(void) { struct task_struct *p, *t; if (!sys_tracepoint_refcount) { read_lock(&tasklist_lock); for_each_process_thread(p, t) { set_task_syscall_work(t, SYSCALL_TRACEPOINT); } read_unlock(&tasklist_lock); } sys_tracepoint_refcount++; return 0; } void syscall_unregfunc(void) { struct task_struct *p, *t; sys_tracepoint_refcount--; if (!sys_tracepoint_refcount) { read_lock(&tasklist_lock); for_each_process_thread(p, t) { clear_task_syscall_work(t, SYSCALL_TRACEPOINT); } read_unlock(&tasklist_lock); } } #endif |
14 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 | // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * * This code builds two trees of free clusters extents. * Trees are sorted by start of extent and by length of extent. * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. * In extreme case code reads on-disk bitmap to find free clusters. * */ #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> #include "ntfs.h" #include "ntfs_fs.h" /* * Maximum number of extents in tree. */ #define NTFS_MAX_WND_EXTENTS (32u * 1024u) struct rb_node_key { struct rb_node node; size_t key; }; struct e_node { struct rb_node_key start; /* Tree sorted by start. */ struct rb_node_key count; /* Tree sorted by len. */ }; static int wnd_rescan(struct wnd_bitmap *wnd); static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); static struct kmem_cache *ntfs_enode_cachep; int __init ntfs3_init_bitmap(void) { ntfs_enode_cachep = kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, SLAB_RECLAIM_ACCOUNT, NULL); return ntfs_enode_cachep ? 0 : -ENOMEM; } void ntfs3_exit_bitmap(void) { kmem_cache_destroy(ntfs_enode_cachep); } /* * wnd_scan * * b_pos + b_len - biggest fragment. * Scan range [wpos wbits) window @buf. * * Return: -1 if not found. */ static size_t wnd_scan(const void *buf, size_t wbit, u32 wpos, u32 wend, size_t to_alloc, size_t *prev_tail, size_t *b_pos, size_t *b_len) { while (wpos < wend) { size_t free_len; u32 free_bits, end; u32 used = find_next_zero_bit_le(buf, wend, wpos); if (used >= wend) { if (*b_len < *prev_tail) { *b_pos = wbit - *prev_tail; *b_len = *prev_tail; } *prev_tail = 0; return -1; } if (used > wpos) { wpos = used; if (*b_len < *prev_tail) { *b_pos = wbit - *prev_tail; *b_len = *prev_tail; } *prev_tail = 0; } /* * Now we have a fragment [wpos, wend) staring with 0. */ end = wpos + to_alloc - *prev_tail; free_bits = find_next_bit_le(buf, min(end, wend), wpos); free_len = *prev_tail + free_bits - wpos; if (*b_len < free_len) { *b_pos = wbit + wpos - *prev_tail; *b_len = free_len; } if (free_len >= to_alloc) return wbit + wpos - *prev_tail; if (free_bits >= wend) { *prev_tail += free_bits - wpos; return -1; } wpos = free_bits + 1; *prev_tail = 0; } return -1; } /* * wnd_close - Frees all resources. */ void wnd_close(struct wnd_bitmap *wnd) { struct rb_node *node, *next; kvfree(wnd->free_bits); wnd->free_bits = NULL; run_close(&wnd->run); node = rb_first(&wnd->start_tree); while (node) { next = rb_next(node); rb_erase(node, &wnd->start_tree); kmem_cache_free(ntfs_enode_cachep, rb_entry(node, struct e_node, start.node)); node = next; } } static struct rb_node *rb_lookup(struct rb_root *root, size_t v) { struct rb_node **p = &root->rb_node; struct rb_node *r = NULL; while (*p) { struct rb_node_key *k; k = rb_entry(*p, struct rb_node_key, node); if (v < k->key) { p = &(*p)->rb_left; } else if (v > k->key) { r = &k->node; p = &(*p)->rb_right; } else { return &k->node; } } return r; } /* * rb_insert_count - Helper function to insert special kind of 'count' tree. */ static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; size_t e_ckey = e->count.key; size_t e_skey = e->start.key; while (*p) { struct e_node *k = rb_entry(parent = *p, struct e_node, count.node); if (e_ckey > k->count.key) { p = &(*p)->rb_left; } else if (e_ckey < k->count.key) { p = &(*p)->rb_right; } else if (e_skey < k->start.key) { p = &(*p)->rb_left; } else if (e_skey > k->start.key) { p = &(*p)->rb_right; } else { WARN_ON(1); return false; } } rb_link_node(&e->count.node, parent, p); rb_insert_color(&e->count.node, root); return true; } /* * rb_insert_start - Helper function to insert special kind of 'count' tree. */ static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; size_t e_skey = e->start.key; while (*p) { struct e_node *k; parent = *p; k = rb_entry(parent, struct e_node, start.node); if (e_skey < k->start.key) { p = &(*p)->rb_left; } else if (e_skey > k->start.key) { p = &(*p)->rb_right; } else { WARN_ON(1); return false; } } rb_link_node(&e->start.node, parent, p); rb_insert_color(&e->start.node, root); return true; } /* * wnd_add_free_ext - Adds a new extent of free space. * @build: 1 when building tree. */ static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, bool build) { struct e_node *e, *e0 = NULL; size_t ib, end_in = bit + len; struct rb_node *n; if (build) { /* Use extent_min to filter too short extents. */ if (wnd->count >= NTFS_MAX_WND_EXTENTS && len <= wnd->extent_min) { wnd->uptodated = -1; return; } } else { /* Try to find extent before 'bit'. */ n = rb_lookup(&wnd->start_tree, bit); if (!n) { n = rb_first(&wnd->start_tree); } else { e = rb_entry(n, struct e_node, start.node); n = rb_next(n); if (e->start.key + e->count.key == bit) { /* Remove left. */ bit = e->start.key; len += e->count.key; rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; e0 = e; } } while (n) { size_t next_end; e = rb_entry(n, struct e_node, start.node); next_end = e->start.key + e->count.key; if (e->start.key > end_in) break; /* Remove right. */ n = rb_next(n); len += next_end - end_in; end_in = next_end; rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; if (!e0) e0 = e; else kmem_cache_free(ntfs_enode_cachep, e); } if (wnd->uptodated != 1) { /* Check bits before 'bit'. */ ib = wnd->zone_bit == wnd->zone_end || bit < wnd->zone_end ? 0 : wnd->zone_end; while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { bit -= 1; len += 1; } /* Check bits after 'end_in'. */ ib = wnd->zone_bit == wnd->zone_end || end_in > wnd->zone_bit ? wnd->nbits : wnd->zone_bit; while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { end_in += 1; len += 1; } } } /* Insert new fragment. */ if (wnd->count >= NTFS_MAX_WND_EXTENTS) { if (e0) kmem_cache_free(ntfs_enode_cachep, e0); wnd->uptodated = -1; /* Compare with smallest fragment. */ n = rb_last(&wnd->count_tree); e = rb_entry(n, struct e_node, count.node); if (len <= e->count.key) goto out; /* Do not insert small fragments. */ if (build) { struct e_node *e2; n = rb_prev(n); e2 = rb_entry(n, struct e_node, count.node); /* Smallest fragment will be 'e2->count.key'. */ wnd->extent_min = e2->count.key; } /* Replace smallest fragment by new one. */ rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; } else { e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); if (!e) { wnd->uptodated = -1; goto out; } if (build && len <= wnd->extent_min) wnd->extent_min = len; } e->start.key = bit; e->count.key = len; if (len > wnd->extent_max) wnd->extent_max = len; rb_insert_start(&wnd->start_tree, e); rb_insert_count(&wnd->count_tree, e); wnd->count += 1; out:; } /* * wnd_remove_free_ext - Remove a run from the cached free space. */ static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) { struct rb_node *n, *n3; struct e_node *e, *e3; size_t end_in = bit + len; size_t end3, end, new_key, new_len, max_new_len; /* Try to find extent before 'bit'. */ n = rb_lookup(&wnd->start_tree, bit); if (!n) return; e = rb_entry(n, struct e_node, start.node); end = e->start.key + e->count.key; new_key = new_len = 0; len = e->count.key; /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */ if (e->start.key > bit) ; else if (end_in <= end) { /* Range [bit,end_in) inside 'e'. */ new_key = end_in; new_len = end - end_in; len = bit - e->start.key; } else if (bit > end) { bool bmax = false; n3 = rb_next(n); while (n3) { e3 = rb_entry(n3, struct e_node, start.node); if (e3->start.key >= end_in) break; if (e3->count.key == wnd->extent_max) bmax = true; end3 = e3->start.key + e3->count.key; if (end3 > end_in) { e3->start.key = end_in; rb_erase(&e3->count.node, &wnd->count_tree); e3->count.key = end3 - end_in; rb_insert_count(&wnd->count_tree, e3); break; } n3 = rb_next(n3); rb_erase(&e3->start.node, &wnd->start_tree); rb_erase(&e3->count.node, &wnd->count_tree); wnd->count -= 1; kmem_cache_free(ntfs_enode_cachep, e3); } if (!bmax) return; n3 = rb_first(&wnd->count_tree); wnd->extent_max = n3 ? rb_entry(n3, struct e_node, count.node)->count.key : 0; return; } if (e->count.key != wnd->extent_max) { ; } else if (rb_prev(&e->count.node)) { ; } else { n3 = rb_next(&e->count.node); max_new_len = max(len, new_len); if (!n3) { wnd->extent_max = max_new_len; } else { e3 = rb_entry(n3, struct e_node, count.node); wnd->extent_max = max(e3->count.key, max_new_len); } } if (!len) { if (new_len) { e->start.key = new_key; rb_erase(&e->count.node, &wnd->count_tree); e->count.key = new_len; rb_insert_count(&wnd->count_tree, e); } else { rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; kmem_cache_free(ntfs_enode_cachep, e); } goto out; } rb_erase(&e->count.node, &wnd->count_tree); e->count.key = len; rb_insert_count(&wnd->count_tree, e); if (!new_len) goto out; if (wnd->count >= NTFS_MAX_WND_EXTENTS) { wnd->uptodated = -1; /* Get minimal extent. */ e = rb_entry(rb_last(&wnd->count_tree), struct e_node, count.node); if (e->count.key > new_len) goto out; /* Replace minimum. */ rb_erase(&e->start.node, &wnd->start_tree); rb_erase(&e->count.node, &wnd->count_tree); wnd->count -= 1; } else { e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); if (!e) wnd->uptodated = -1; } if (e) { e->start.key = new_key; e->count.key = new_len; rb_insert_start(&wnd->start_tree, e); rb_insert_count(&wnd->count_tree, e); wnd->count += 1; } out: if (!wnd->count && 1 != wnd->uptodated) wnd_rescan(wnd); } /* * wnd_rescan - Scan all bitmap. Used while initialization. */ static int wnd_rescan(struct wnd_bitmap *wnd) { int err = 0; size_t prev_tail = 0; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi = sb->s_fs_info; u64 lbo, len = 0; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; u32 wbits = 8 * sb->s_blocksize; u32 used, frb; size_t wpos, wbit, iw, vbo; struct buffer_head *bh = NULL; CLST lcn, clen; wnd->uptodated = 0; wnd->extent_max = 0; wnd->extent_min = MINUS_ONE_T; wnd->total_zeroes = 0; vbo = 0; for (iw = 0; iw < wnd->nwnd; iw++) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; if (wnd->inited) { if (!wnd->free_bits[iw]) { /* All ones. */ if (prev_tail) { wnd_add_free_ext(wnd, vbo * 8 - prev_tail, prev_tail, true); prev_tail = 0; } goto next_wnd; } if (wbits == wnd->free_bits[iw]) { /* All zeroes. */ prev_tail += wbits; wnd->total_zeroes += wbits; goto next_wnd; } } if (!len) { u32 off = vbo & sbi->cluster_mask; if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, &lcn, &clen, NULL)) { err = -ENOENT; goto out; } lbo = ((u64)lcn << cluster_bits) + off; len = ((u64)clen << cluster_bits) - off; } bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) { err = -EIO; goto out; } used = ntfs_bitmap_weight_le(bh->b_data, wbits); if (used < wbits) { frb = wbits - used; wnd->free_bits[iw] = frb; wnd->total_zeroes += frb; } wpos = 0; wbit = vbo * 8; if (wbit + wbits > wnd->nbits) wbits = wnd->nbits - wbit; do { used = find_next_zero_bit_le(bh->b_data, wbits, wpos); if (used > wpos && prev_tail) { wnd_add_free_ext(wnd, wbit + wpos - prev_tail, prev_tail, true); prev_tail = 0; } wpos = used; if (wpos >= wbits) { /* No free blocks. */ prev_tail = 0; break; } frb = find_next_bit_le(bh->b_data, wbits, wpos); if (frb >= wbits) { /* Keep last free block. */ prev_tail += frb - wpos; break; } wnd_add_free_ext(wnd, wbit + wpos - prev_tail, frb + prev_tail - wpos, true); /* Skip free block and first '1'. */ wpos = frb + 1; /* Reset previous tail. */ prev_tail = 0; } while (wpos < wbits); next_wnd: if (bh) put_bh(bh); bh = NULL; vbo += blocksize; if (len) { len -= blocksize; lbo += blocksize; } } /* Add last block. */ if (prev_tail) wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); /* * Before init cycle wnd->uptodated was 0. * If any errors or limits occurs while initialization then * wnd->uptodated will be -1. * If 'uptodated' is still 0 then Tree is really updated. */ if (!wnd->uptodated) wnd->uptodated = 1; if (wnd->zone_bit != wnd->zone_end) { size_t zlen = wnd->zone_end - wnd->zone_bit; wnd->zone_end = wnd->zone_bit; wnd_zone_set(wnd, wnd->zone_bit, zlen); } out: return err; } int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) { int err; u32 blocksize = sb->s_blocksize; u32 wbits = blocksize * 8; init_rwsem(&wnd->rw_lock); wnd->sb = sb; wnd->nbits = nbits; wnd->total_zeroes = nbits; wnd->extent_max = MINUS_ONE_T; wnd->zone_bit = wnd->zone_end = 0; wnd->nwnd = bytes_to_block(sb, ntfs3_bitmap_size(nbits)); wnd->bits_last = nbits & (wbits - 1); if (!wnd->bits_last) wnd->bits_last = wbits; wnd->free_bits = kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO); if (!wnd->free_bits) return -ENOMEM; err = wnd_rescan(wnd); if (err) return err; wnd->inited = true; return 0; } /* * wnd_map - Call sb_bread for requested window. */ static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) { size_t vbo; CLST lcn, clen; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi; struct buffer_head *bh; u64 lbo; sbi = sb->s_fs_info; vbo = (u64)iw << sb->s_blocksize_bits; if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, NULL)) { return ERR_PTR(-ENOENT); } lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); if (!bh) return ERR_PTR(-EIO); return bh; } /* * wnd_set_free - Mark the bits range from bit to bit + bits as free. */ int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) { int err = 0; struct super_block *sb = wnd->sb; u32 wbits = 8 * sb->s_blocksize; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbit = bit & (wbits - 1); struct buffer_head *bh; u32 op; for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } lock_buffer(bh); ntfs_bitmap_clear_le(bh->b_data, wbit, op); wnd->free_bits[iw] += op; wnd->total_zeroes += op; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); put_bh(bh); wnd_add_free_ext(wnd, bit, op, false); } return err; } /* * wnd_set_used - Mark the bits range from bit to bit + bits as used. */ int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) { int err = 0; struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); struct buffer_head *bh; u32 op; for (; iw < wnd->nwnd && bits; iw++, bit += op, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } lock_buffer(bh); ntfs_bitmap_set_le(bh->b_data, wbit, op); wnd->free_bits[iw] -= op; wnd->total_zeroes -= op; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); put_bh(bh); if (!RB_EMPTY_ROOT(&wnd->start_tree)) wnd_remove_free_ext(wnd, bit, op); } return err; } /* * wnd_set_used_safe - Mark the bits range from bit to bit + bits as used. * * Unlikely wnd_set_used/wnd_set_free this function is not full trusted. * It scans every bit in bitmap and marks free bit as used. * @done - how many bits were marked as used. * * NOTE: normally *done should be 0. */ int wnd_set_used_safe(struct wnd_bitmap *wnd, size_t bit, size_t bits, size_t *done) { size_t i, from = 0, len = 0; int err = 0; *done = 0; for (i = 0; i < bits; i++) { if (wnd_is_free(wnd, bit + i, 1)) { if (!len) from = bit + i; len += 1; } else if (len) { err = wnd_set_used(wnd, from, len); *done += len; len = 0; if (err) break; } } if (len) { /* last fragment. */ err = wnd_set_used(wnd, from, len); *done += len; } return err; } /* * wnd_is_free_hlp * * Return: True if all clusters [bit, bit+bits) are free (bitmap only). */ static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) { struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); u32 op; for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); if (wbits != wnd->free_bits[iw]) { bool ret; struct buffer_head *bh = wnd_map(wnd, iw); if (IS_ERR(bh)) return false; ret = are_bits_clear(bh->b_data, wbit, op); put_bh(bh); if (!ret) return false; } } return true; } /* * wnd_is_free * * Return: True if all clusters [bit, bit+bits) are free. */ bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) { bool ret; struct rb_node *n; size_t end; struct e_node *e; if (RB_EMPTY_ROOT(&wnd->start_tree)) goto use_wnd; n = rb_lookup(&wnd->start_tree, bit); if (!n) goto use_wnd; e = rb_entry(n, struct e_node, start.node); end = e->start.key + e->count.key; if (bit < end && bit + bits <= end) return true; use_wnd: ret = wnd_is_free_hlp(wnd, bit, bits); return ret; } /* * wnd_is_used * * Return: True if all clusters [bit, bit+bits) are used. */ bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) { bool ret = false; struct super_block *sb = wnd->sb; size_t iw = bit >> (sb->s_blocksize_bits + 3); u32 wbits = 8 * sb->s_blocksize; u32 wbit = bit & (wbits - 1); u32 op; size_t end; struct rb_node *n; struct e_node *e; if (RB_EMPTY_ROOT(&wnd->start_tree)) goto use_wnd; end = bit + bits; n = rb_lookup(&wnd->start_tree, end - 1); if (!n) goto use_wnd; e = rb_entry(n, struct e_node, start.node); if (e->start.key + e->count.key > bit) return false; use_wnd: for (; iw < wnd->nwnd && bits; iw++, bits -= op, wbit = 0) { if (unlikely(iw + 1 == wnd->nwnd)) wbits = wnd->bits_last; op = min_t(u32, wbits - wbit, bits); if (wnd->free_bits[iw]) { bool ret; struct buffer_head *bh = wnd_map(wnd, iw); if (IS_ERR(bh)) goto out; ret = are_bits_set(bh->b_data, wbit, op); put_bh(bh); if (!ret) goto out; } } ret = true; out: return ret; } /* * wnd_find - Look for free space. * * - flags - BITMAP_FIND_XXX flags * * Return: 0 if not found. */ size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, size_t flags, size_t *allocated) { struct super_block *sb; u32 wbits, wpos, wzbit, wzend; size_t fnd, max_alloc, b_len, b_pos; size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; size_t to_alloc0 = to_alloc; const struct e_node *e; const struct rb_node *pr, *cr; u8 log2_bits; bool fbits_valid; struct buffer_head *bh; /* Fast checking for available free space. */ if (flags & BITMAP_FIND_FULL) { size_t zeroes = wnd_zeroes(wnd); zeroes -= wnd->zone_end - wnd->zone_bit; if (zeroes < to_alloc0) goto no_space; if (to_alloc0 > wnd->extent_max) goto no_space; } else { if (to_alloc > wnd->extent_max) to_alloc = wnd->extent_max; } if (wnd->zone_bit <= hint && hint < wnd->zone_end) hint = wnd->zone_end; max_alloc = wnd->nbits; b_len = b_pos = 0; if (hint >= max_alloc) hint = 0; if (RB_EMPTY_ROOT(&wnd->start_tree)) { if (wnd->uptodated == 1) { /* Extents tree is updated -> No free space. */ goto no_space; } goto scan_bitmap; } e = NULL; if (!hint) goto allocate_biggest; /* Use hint: Enumerate extents by start >= hint. */ pr = NULL; cr = wnd->start_tree.rb_node; for (;;) { e = rb_entry(cr, struct e_node, start.node); if (e->start.key == hint) break; if (e->start.key < hint) { pr = cr; cr = cr->rb_right; if (!cr) break; continue; } cr = cr->rb_left; if (!cr) { e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; break; } } if (!e) goto allocate_biggest; if (e->start.key + e->count.key > hint) { /* We have found extension with 'hint' inside. */ size_t len = e->start.key + e->count.key - hint; if (len >= to_alloc && hint + to_alloc <= max_alloc) { fnd = hint; goto found; } if (!(flags & BITMAP_FIND_FULL)) { if (len > to_alloc) len = to_alloc; if (hint + len <= max_alloc) { fnd = hint; to_alloc = len; goto found; } } } allocate_biggest: /* Allocate from biggest free extent. */ e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); if (e->count.key != wnd->extent_max) wnd->extent_max = e->count.key; if (e->count.key < max_alloc) { if (e->count.key >= to_alloc) { ; } else if (flags & BITMAP_FIND_FULL) { if (e->count.key < to_alloc0) { /* Biggest free block is less then requested. */ goto no_space; } to_alloc = e->count.key; } else if (-1 != wnd->uptodated) { to_alloc = e->count.key; } else { /* Check if we can use more bits. */ size_t op, max_check; struct rb_root start_tree; memcpy(&start_tree, &wnd->start_tree, sizeof(struct rb_root)); memset(&wnd->start_tree, 0, sizeof(struct rb_root)); max_check = e->start.key + to_alloc; if (max_check > max_alloc) max_check = max_alloc; for (op = e->start.key + e->count.key; op < max_check; op++) { if (!wnd_is_free(wnd, op, 1)) break; } memcpy(&wnd->start_tree, &start_tree, sizeof(struct rb_root)); to_alloc = op - e->start.key; } /* Prepare to return. */ fnd = e->start.key; if (e->start.key + to_alloc > max_alloc) to_alloc = max_alloc - e->start.key; goto found; } if (wnd->uptodated == 1) { /* Extents tree is updated -> no free space. */ goto no_space; } b_len = e->count.key; b_pos = e->start.key; scan_bitmap: sb = wnd->sb; log2_bits = sb->s_blocksize_bits + 3; /* At most two ranges [hint, max_alloc) + [0, hint). */ Again: /* TODO: Optimize request for case nbits > wbits. */ iw = hint >> log2_bits; wbits = sb->s_blocksize * 8; wpos = hint & (wbits - 1); prev_tail = 0; fbits_valid = true; if (max_alloc == wnd->nbits) { nwnd = wnd->nwnd; } else { size_t t = max_alloc + wbits - 1; nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; } /* Enumerate all windows. */ for (; iw < nwnd; iw++) { wbit = iw << log2_bits; if (!wnd->free_bits[iw]) { if (prev_tail > b_len) { b_pos = wbit - prev_tail; b_len = prev_tail; } /* Skip full used window. */ prev_tail = 0; wpos = 0; continue; } if (unlikely(iw + 1 == nwnd)) { if (max_alloc == wnd->nbits) { wbits = wnd->bits_last; } else { size_t t = max_alloc & (wbits - 1); if (t) { wbits = t; fbits_valid = false; } } } if (wnd->zone_end > wnd->zone_bit) { ebit = wbit + wbits; zbit = max(wnd->zone_bit, wbit); zend = min(wnd->zone_end, ebit); /* Here we have a window [wbit, ebit) and zone [zbit, zend). */ if (zend <= zbit) { /* Zone does not overlap window. */ } else { wzbit = zbit - wbit; wzend = zend - wbit; /* Zone overlaps window. */ if (wnd->free_bits[iw] == wzend - wzbit) { prev_tail = 0; wpos = 0; continue; } /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */ bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { /* TODO: Error */ prev_tail = 0; wpos = 0; continue; } /* Scan range [wbit, zbit). */ if (wpos < wzbit) { /* Scan range [wpos, zbit). */ fnd = wnd_scan(bh->b_data, wbit, wpos, wzbit, to_alloc, &prev_tail, &b_pos, &b_len); if (fnd != MINUS_ONE_T) { put_bh(bh); goto found; } } prev_tail = 0; /* Scan range [zend, ebit). */ if (wzend < wbits) { fnd = wnd_scan(bh->b_data, wbit, max(wzend, wpos), wbits, to_alloc, &prev_tail, &b_pos, &b_len); if (fnd != MINUS_ONE_T) { put_bh(bh); goto found; } } wpos = 0; put_bh(bh); continue; } } /* Current window does not overlap zone. */ if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { /* Window is empty. */ if (prev_tail + wbits >= to_alloc) { fnd = wbit + wpos - prev_tail; goto found; } /* Increase 'prev_tail' and process next window. */ prev_tail += wbits; wpos = 0; continue; } /* Read window. */ bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { // TODO: Error. prev_tail = 0; wpos = 0; continue; } /* Scan range [wpos, eBits). */ fnd = wnd_scan(bh->b_data, wbit, wpos, wbits, to_alloc, &prev_tail, &b_pos, &b_len); put_bh(bh); if (fnd != MINUS_ONE_T) goto found; } if (b_len < prev_tail) { /* The last fragment. */ b_len = prev_tail; b_pos = max_alloc - prev_tail; } if (hint) { /* * We have scanned range [hint max_alloc). * Prepare to scan range [0 hint + to_alloc). */ size_t nextmax = hint + to_alloc; if (likely(nextmax >= hint) && nextmax < max_alloc) max_alloc = nextmax; hint = 0; goto Again; } if (!b_len) goto no_space; wnd->extent_max = b_len; if (flags & BITMAP_FIND_FULL) goto no_space; fnd = b_pos; to_alloc = b_len; found: if (flags & BITMAP_FIND_MARK_AS_USED) { /* TODO: Optimize remove extent (pass 'e'?). */ if (wnd_set_used(wnd, fnd, to_alloc)) goto no_space; } else if (wnd->extent_max != MINUS_ONE_T && to_alloc > wnd->extent_max) { wnd->extent_max = to_alloc; } *allocated = fnd; return to_alloc; no_space: return 0; } /* * wnd_extend - Extend bitmap ($MFT bitmap). */ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) { int err; struct super_block *sb = wnd->sb; struct ntfs_sb_info *sbi = sb->s_fs_info; u32 blocksize = sb->s_blocksize; u32 wbits = blocksize * 8; u32 b0, new_last; size_t bits, iw, new_wnd; size_t old_bits = wnd->nbits; u16 *new_free; if (new_bits <= old_bits) return -EINVAL; /* Align to 8 byte boundary. */ new_wnd = bytes_to_block(sb, ntfs3_bitmap_size(new_bits)); new_last = new_bits & (wbits - 1); if (!new_last) new_last = wbits; if (new_wnd != wnd->nwnd) { new_free = kmalloc_array(new_wnd, sizeof(u16), GFP_NOFS); if (!new_free) return -ENOMEM; memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short)); memset(new_free + wnd->nwnd, 0, (new_wnd - wnd->nwnd) * sizeof(short)); kvfree(wnd->free_bits); wnd->free_bits = new_free; } /* Zero bits [old_bits,new_bits). */ bits = new_bits - old_bits; b0 = old_bits & (wbits - 1); for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { u32 op; size_t frb; u64 vbo, lbo, bytes; struct buffer_head *bh; if (iw + 1 == new_wnd) wbits = new_last; op = b0 + bits > wbits ? wbits - b0 : bits; vbo = (u64)iw * blocksize; err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); if (err) return err; bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) return -EIO; lock_buffer(bh); ntfs_bitmap_clear_le(bh->b_data, b0, blocksize * 8 - b0); frb = wbits - ntfs_bitmap_weight_le(bh->b_data, wbits); wnd->total_zeroes += frb - wnd->free_bits[iw]; wnd->free_bits[iw] = frb; set_buffer_uptodate(bh); mark_buffer_dirty(bh); unlock_buffer(bh); /* err = sync_dirty_buffer(bh); */ b0 = 0; bits -= op; } wnd->nbits = new_bits; wnd->nwnd = new_wnd; wnd->bits_last = new_last; wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); return 0; } void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) { size_t zlen = wnd->zone_end - wnd->zone_bit; if (zlen) wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) wnd_remove_free_ext(wnd, lcn, len); wnd->zone_bit = lcn; wnd->zone_end = lcn + len; } int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) { int err = 0; struct super_block *sb = sbi->sb; struct wnd_bitmap *wnd = &sbi->used.bitmap; u32 wbits = 8 * sb->s_blocksize; CLST len = 0, lcn = 0, done = 0; CLST minlen = bytes_to_cluster(sbi, range->minlen); CLST lcn_from = bytes_to_cluster(sbi, range->start); size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); u32 wbit = lcn_from & (wbits - 1); CLST lcn_to; if (!minlen) minlen = 1; if (range->len == (u64)-1) lcn_to = wnd->nbits; else lcn_to = bytes_to_cluster(sbi, range->start + range->len); down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); for (; iw < wnd->nwnd; iw++, wbit = 0) { CLST lcn_wnd = iw * wbits; struct buffer_head *bh; if (lcn_wnd > lcn_to) break; if (!wnd->free_bits[iw]) continue; if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; if (lcn_wnd + wbits > lcn_to) wbits = lcn_to - lcn_wnd; bh = wnd_map(wnd, iw); if (IS_ERR(bh)) { err = PTR_ERR(bh); break; } for (; wbit < wbits; wbit++) { if (!test_bit_le(wbit, bh->b_data)) { if (!len) lcn = lcn_wnd + wbit; len += 1; continue; } if (len >= minlen) { err = ntfs_discard(sbi, lcn, len); if (err) goto out; done += len; } len = 0; } put_bh(bh); } /* Process the last fragment. */ if (len >= minlen) { err = ntfs_discard(sbi, lcn, len); if (err) goto out; done += len; } out: range->len = (u64)done << sbi->cluster_bits; up_read(&wnd->rw_lock); return err; } #if BITS_PER_LONG == 64 typedef __le64 bitmap_ulong; #define cpu_to_ul(x) cpu_to_le64(x) #define ul_to_cpu(x) le64_to_cpu(x) #else typedef __le32 bitmap_ulong; #define cpu_to_ul(x) cpu_to_le32(x) #define ul_to_cpu(x) le32_to_cpu(x) #endif void ntfs_bitmap_set_le(void *map, unsigned int start, int len) { bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); const unsigned int size = start + len; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); bitmap_ulong mask_to_set = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); while (len - bits_to_set >= 0) { *p |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_LONG; mask_to_set = cpu_to_ul(~0UL); p++; } if (len) { mask_to_set &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); *p |= mask_to_set; } } void ntfs_bitmap_clear_le(void *map, unsigned int start, int len) { bitmap_ulong *p = (bitmap_ulong *)map + BIT_WORD(start); const unsigned int size = start + len; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); bitmap_ulong mask_to_clear = cpu_to_ul(BITMAP_FIRST_WORD_MASK(start)); while (len - bits_to_clear >= 0) { *p &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_LONG; mask_to_clear = cpu_to_ul(~0UL); p++; } if (len) { mask_to_clear &= cpu_to_ul(BITMAP_LAST_WORD_MASK(size)); *p &= ~mask_to_clear; } } unsigned int ntfs_bitmap_weight_le(const void *bitmap, int bits) { const ulong *bmp = bitmap; unsigned int k, lim = bits / BITS_PER_LONG; unsigned int w = 0; for (k = 0; k < lim; k++) w += hweight_long(bmp[k]); if (bits % BITS_PER_LONG) { w += hweight_long(ul_to_cpu(((bitmap_ulong *)bitmap)[k]) & BITMAP_LAST_WORD_MASK(bits)); } return w; } |
121 121 70 70 62 62 24 24 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA timer back-end using hrtimer * Copyright (C) 2008 Takashi Iwai */ #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/hrtimer.h> #include <sound/core.h> #include <sound/timer.h> MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>"); MODULE_DESCRIPTION("ALSA hrtimer backend"); MODULE_LICENSE("GPL"); MODULE_ALIAS("snd-timer-" __stringify(SNDRV_TIMER_GLOBAL_HRTIMER)); #define NANO_SEC 1000000000UL /* 10^9 in sec */ static unsigned int resolution; struct snd_hrtimer { struct snd_timer *timer; struct hrtimer hrt; bool in_callback; }; static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt) { struct snd_hrtimer *stime = container_of(hrt, struct snd_hrtimer, hrt); struct snd_timer *t = stime->timer; ktime_t delta; unsigned long ticks; enum hrtimer_restart ret = HRTIMER_NORESTART; scoped_guard(spinlock, &t->lock) { if (!t->running) return HRTIMER_NORESTART; /* fast path */ stime->in_callback = true; ticks = t->sticks; } /* calculate the drift */ delta = ktime_sub(hrt->base->get_time(), hrtimer_get_expires(hrt)); if (delta > 0) ticks += ktime_divns(delta, ticks * resolution); snd_timer_interrupt(stime->timer, ticks); guard(spinlock)(&t->lock); if (t->running) { hrtimer_add_expires_ns(hrt, t->sticks * resolution); ret = HRTIMER_RESTART; } stime->in_callback = false; return ret; } static int snd_hrtimer_open(struct snd_timer *t) { struct snd_hrtimer *stime; stime = kzalloc(sizeof(*stime), GFP_KERNEL); if (!stime) return -ENOMEM; stime->timer = t; hrtimer_setup(&stime->hrt, snd_hrtimer_callback, CLOCK_MONOTONIC, HRTIMER_MODE_REL); t->private_data = stime; return 0; } static int snd_hrtimer_close(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; if (stime) { scoped_guard(spinlock_irq, &t->lock) { t->running = 0; /* just to be sure */ stime->in_callback = 1; /* skip start/stop */ } hrtimer_cancel(&stime->hrt); kfree(stime); t->private_data = NULL; } return 0; } static int snd_hrtimer_start(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; if (stime->in_callback) return 0; hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution), HRTIMER_MODE_REL); return 0; } static int snd_hrtimer_stop(struct snd_timer *t) { struct snd_hrtimer *stime = t->private_data; if (stime->in_callback) return 0; hrtimer_try_to_cancel(&stime->hrt); return 0; } static const struct snd_timer_hardware hrtimer_hw __initconst = { .flags = SNDRV_TIMER_HW_AUTO | SNDRV_TIMER_HW_WORK, .open = snd_hrtimer_open, .close = snd_hrtimer_close, .start = snd_hrtimer_start, .stop = snd_hrtimer_stop, }; /* * entry functions */ static struct snd_timer *mytimer; static int __init snd_hrtimer_init(void) { struct snd_timer *timer; int err; resolution = hrtimer_resolution; /* Create a new timer and set up the fields */ err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER, &timer); if (err < 0) return err; timer->module = THIS_MODULE; strcpy(timer->name, "HR timer"); timer->hw = hrtimer_hw; timer->hw.resolution = resolution; timer->hw.ticks = NANO_SEC / resolution; timer->max_instances = 100; /* lower the limit */ err = snd_timer_global_register(timer); if (err < 0) { snd_timer_global_free(timer); return err; } mytimer = timer; /* remember this */ return 0; } static void __exit snd_hrtimer_exit(void) { if (mytimer) { snd_timer_global_free(mytimer); mytimer = NULL; } } module_init(snd_hrtimer_init); module_exit(snd_hrtimer_exit); |
1354 1358 1356 1353 1355 5 5 2 2 99 96 12 23 23 20 3 8 16 7 1 8 7 7 7 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <linux/rculist.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/bpf_local_storage.h> #include <net/bpf_sk_storage.h> #include <net/sock.h> #include <uapi/linux/sock_diag.h> #include <uapi/linux/btf.h> #include <linux/rcupdate_trace.h> DEFINE_BPF_STORAGE_CACHE(sk_cache); static struct bpf_local_storage_data * bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) { struct bpf_local_storage *sk_storage; struct bpf_local_storage_map *smap; sk_storage = rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); if (!sk_storage) return NULL; smap = (struct bpf_local_storage_map *)map; return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); } static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) { struct bpf_local_storage_data *sdata; sdata = bpf_sk_storage_lookup(sk, map, false); if (!sdata) return -ENOENT; bpf_selem_unlink(SELEM(sdata), false); return 0; } /* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; migrate_disable(); rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage) goto out; bpf_local_storage_destroy(sk_storage); out: rcu_read_unlock(); migrate_enable(); } static void bpf_sk_storage_map_free(struct bpf_map *map) { bpf_local_storage_map_free(map, &sk_cache, NULL); } static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) { return bpf_local_storage_map_alloc(attr, &sk_cache, false); } static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -ENOTSUPP; } static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) { struct bpf_local_storage_data *sdata; struct socket *sock; int fd, err; fd = *(int *)key; sock = sockfd_lookup(fd, &err); if (sock) { sdata = bpf_sk_storage_lookup(sock->sk, map, true); sockfd_put(sock); return sdata ? sdata->data : NULL; } return ERR_PTR(err); } static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct socket *sock; int fd, err; fd = *(int *)key; sock = sockfd_lookup(fd, &err); if (sock) { sdata = bpf_local_storage_update( sock->sk, (struct bpf_local_storage_map *)map, value, map_flags, false, GFP_ATOMIC); sockfd_put(sock); return PTR_ERR_OR_ZERO(sdata); } return err; } static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) { struct socket *sock; int fd, err; fd = *(int *)key; sock = sockfd_lookup(fd, &err); if (sock) { err = bpf_sk_storage_del(sock->sk, map); sockfd_put(sock); return err; } return err; } static struct bpf_local_storage_elem * bpf_sk_storage_clone_elem(struct sock *newsk, struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem) { struct bpf_local_storage_elem *copy_selem; copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, false, GFP_ATOMIC); if (!copy_selem) return NULL; if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, SDATA(selem)->data, true); else copy_map_value(&smap->map, SDATA(copy_selem)->data, SDATA(selem)->data); return copy_selem; } int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) { struct bpf_local_storage *new_sk_storage = NULL; struct bpf_local_storage *sk_storage; struct bpf_local_storage_elem *selem; int ret = 0; RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); migrate_disable(); rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage || hlist_empty(&sk_storage->list)) goto out; hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { struct bpf_local_storage_elem *copy_selem; struct bpf_local_storage_map *smap; struct bpf_map *map; smap = rcu_dereference(SDATA(selem)->smap); if (!(smap->map.map_flags & BPF_F_CLONE)) continue; /* Note that for lockless listeners adding new element * here can race with cleanup in bpf_local_storage_map_free. * Try to grab map refcnt to make sure that it's still * alive and prevent concurrent removal. */ map = bpf_map_inc_not_zero(&smap->map); if (IS_ERR(map)) continue; copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); if (!copy_selem) { ret = -ENOMEM; bpf_map_put(map); goto out; } if (new_sk_storage) { bpf_selem_link_map(smap, copy_selem); bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); } else { ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); if (ret) { bpf_selem_free(copy_selem, smap, true); atomic_sub(smap->elem_size, &newsk->sk_omem_alloc); bpf_map_put(map); goto out; } new_sk_storage = rcu_dereference(copy_selem->local_storage); } bpf_map_put(map); } out: rcu_read_unlock(); migrate_enable(); /* In case of an error, don't free anything explicitly here, the * caller is responsible to call bpf_sk_storage_free. */ return ret; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags, gfp_t, gfp_flags) { struct bpf_local_storage_data *sdata; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) return (unsigned long)NULL; sdata = bpf_sk_storage_lookup(sk, map, true); if (sdata) return (unsigned long)sdata->data; if (flags == BPF_SK_STORAGE_GET_F_CREATE && /* Cannot add new elem to a going away sk. * Otherwise, the new elem may become a leak * (and also other memory issues during map * destruction). */ refcount_inc_not_zero(&sk->sk_refcnt)) { sdata = bpf_local_storage_update( sk, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); /* sk must be a fullsock (guaranteed by verifier), * so sock_gen_put() is unnecessary. */ sock_put(sk); return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; } return (unsigned long)NULL; } BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) { WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!sk || !sk_fullsock(sk)) return -EINVAL; if (refcount_inc_not_zero(&sk->sk_refcnt)) { int err; err = bpf_sk_storage_del(sk, map); sock_put(sk); return err; } return -ENOENT; } static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) { struct sock *sk = (struct sock *)owner; int optmem_max; optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); /* same check as in sock_kmalloc() */ if (size <= optmem_max && atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { atomic_add(size, &sk->sk_omem_alloc); return 0; } return -ENOMEM; } static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, void *owner, u32 size) { struct sock *sk = owner; atomic_sub(size, &sk->sk_omem_alloc); } static struct bpf_local_storage __rcu ** bpf_sk_storage_ptr(void *owner) { struct sock *sk = owner; return &sk->sk_bpf_storage; } const struct bpf_map_ops sk_storage_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bpf_local_storage_map_alloc_check, .map_alloc = bpf_sk_storage_map_alloc, .map_free = bpf_sk_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, .map_check_btf = bpf_local_storage_map_check_btf, .map_btf_id = &bpf_local_storage_map_btf_id[0], .map_local_storage_charge = bpf_sk_storage_charge, .map_local_storage_uncharge = bpf_sk_storage_uncharge, .map_owner_storage_ptr = bpf_sk_storage_ptr, .map_mem_usage = bpf_local_storage_map_mem_usage, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { .func = bpf_sk_storage_get, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { .func = bpf_sk_storage_get, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_sk_storage_delete_proto = { .func = bpf_sk_storage_delete, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, }; static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) { if (prog->aux->dst_prog) return false; /* Ensure the tracing program is not tracing * any bpf_sk_storage*() function and also * use the bpf_sk_storage_(get|delete) helper. */ switch (prog->expected_attach_type) { case BPF_TRACE_ITER: case BPF_TRACE_RAW_TP: /* bpf_sk_storage has no trace point */ return true; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", strlen("bpf_sk_storage")); default: return false; } return false; } /* *gfp_flags* is a hidden argument provided by the verifier */ BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags, gfp_t, gfp_flags) { WARN_ON_ONCE(!bpf_rcu_lock_held()); if (in_hardirq() || in_nmi()) return (unsigned long)NULL; return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, gfp_flags); } BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, struct sock *, sk) { WARN_ON_ONCE(!bpf_rcu_lock_held()); if (in_hardirq() || in_nmi()) return -EPERM; return ____bpf_sk_storage_delete(map, sk); } const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { .func = bpf_sk_storage_get_tracing, .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, .allowed = bpf_sk_storage_tracing_allowed, }; const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { .func = bpf_sk_storage_delete_tracing, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], .allowed = bpf_sk_storage_tracing_allowed, }; struct bpf_sk_storage_diag { u32 nr_maps; struct bpf_map *maps[]; }; /* The reply will be like: * INET_DIAG_BPF_SK_STORAGES (nla_nest) * SK_DIAG_BPF_STORAGE (nla_nest) * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) * SK_DIAG_BPF_STORAGE (nla_nest) * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) * .... */ static int nla_value_size(u32 value_size) { /* SK_DIAG_BPF_STORAGE (nla_nest) * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) */ return nla_total_size(0) + nla_total_size(sizeof(u32)) + nla_total_size_64bit(value_size); } void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) { u32 i; if (!diag) return; for (i = 0; i < diag->nr_maps; i++) bpf_map_put(diag->maps[i]); kfree(diag); } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, const struct bpf_map *map) { u32 i; for (i = 0; i < diag->nr_maps; i++) { if (diag->maps[i] == map) return true; } return false; } struct bpf_sk_storage_diag * bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) { struct bpf_sk_storage_diag *diag; struct nlattr *nla; u32 nr_maps = 0; int rem, err; /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as * the map_alloc_check() side also does. */ if (!bpf_capable()) return ERR_PTR(-EPERM); nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, nla_stgs, rem) { if (nla_len(nla) != sizeof(u32)) return ERR_PTR(-EINVAL); nr_maps++; } diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); if (!diag) return ERR_PTR(-ENOMEM); nla_for_each_nested_type(nla, SK_DIAG_BPF_STORAGE_REQ_MAP_FD, nla_stgs, rem) { int map_fd = nla_get_u32(nla); struct bpf_map *map = bpf_map_get(map_fd); if (IS_ERR(map)) { err = PTR_ERR(map); goto err_free; } if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { bpf_map_put(map); err = -EINVAL; goto err_free; } if (diag_check_dup(diag, map)) { bpf_map_put(map); err = -EEXIST; goto err_free; } diag->maps[diag->nr_maps++] = map; } return diag; err_free: bpf_sk_storage_diag_free(diag); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) { struct nlattr *nla_stg, *nla_value; struct bpf_local_storage_map *smap; /* It cannot exceed max nlattr's payload */ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); if (!nla_stg) return -EMSGSIZE; smap = rcu_dereference(sdata->smap); if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) goto errout; nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, smap->map.value_size, SK_DIAG_BPF_STORAGE_PAD); if (!nla_value) goto errout; if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) copy_map_value_locked(&smap->map, nla_data(nla_value), sdata->data, true); else copy_map_value(&smap->map, nla_data(nla_value), sdata->data); nla_nest_end(skb, nla_stg); return 0; errout: nla_nest_cancel(skb, nla_stg); return -EMSGSIZE; } static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, int stg_array_type, unsigned int *res_diag_size) { /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ unsigned int diag_size = nla_total_size(0); struct bpf_local_storage *sk_storage; struct bpf_local_storage_elem *selem; struct bpf_local_storage_map *smap; struct nlattr *nla_stgs; unsigned int saved_len; int err = 0; rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage || hlist_empty(&sk_storage->list)) { rcu_read_unlock(); return 0; } nla_stgs = nla_nest_start(skb, stg_array_type); if (!nla_stgs) /* Continue to learn diag_size */ err = -EMSGSIZE; saved_len = skb->len; hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { smap = rcu_dereference(SDATA(selem)->smap); diag_size += nla_value_size(smap->map.value_size); if (nla_stgs && diag_get(SDATA(selem), skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } rcu_read_unlock(); if (nla_stgs) { if (saved_len == skb->len) nla_nest_cancel(skb, nla_stgs); else nla_nest_end(skb, nla_stgs); } if (diag_size == nla_total_size(0)) { *res_diag_size = 0; return 0; } *res_diag_size = diag_size; return err; } int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, struct sock *sk, struct sk_buff *skb, int stg_array_type, unsigned int *res_diag_size) { /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ unsigned int diag_size = nla_total_size(0); struct bpf_local_storage *sk_storage; struct bpf_local_storage_data *sdata; struct nlattr *nla_stgs; unsigned int saved_len; int err = 0; u32 i; *res_diag_size = 0; /* No map has been specified. Dump all. */ if (!diag->nr_maps) return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, res_diag_size); rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage || hlist_empty(&sk_storage->list)) { rcu_read_unlock(); return 0; } nla_stgs = nla_nest_start(skb, stg_array_type); if (!nla_stgs) /* Continue to learn diag_size */ err = -EMSGSIZE; saved_len = skb->len; for (i = 0; i < diag->nr_maps; i++) { sdata = bpf_local_storage_lookup(sk_storage, (struct bpf_local_storage_map *)diag->maps[i], false); if (!sdata) continue; diag_size += nla_value_size(diag->maps[i]->value_size); if (nla_stgs && diag_get(sdata, skb)) /* Continue to learn diag_size */ err = -EMSGSIZE; } rcu_read_unlock(); if (nla_stgs) { if (saved_len == skb->len) nla_nest_cancel(skb, nla_stgs); else nla_nest_end(skb, nla_stgs); } if (diag_size == nla_total_size(0)) { *res_diag_size = 0; return 0; } *res_diag_size = diag_size; return err; } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); struct bpf_iter_seq_sk_storage_map_info { struct bpf_map *map; unsigned int bucket_id; unsigned skip_elems; }; static struct bpf_local_storage_elem * bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, struct bpf_local_storage_elem *prev_selem) __acquires(RCU) __releases(RCU) { struct bpf_local_storage *sk_storage; struct bpf_local_storage_elem *selem; u32 skip_elems = info->skip_elems; struct bpf_local_storage_map *smap; u32 bucket_id = info->bucket_id; u32 i, count, n_buckets; struct bpf_local_storage_map_bucket *b; smap = (struct bpf_local_storage_map *)info->map; n_buckets = 1U << smap->bucket_log; if (bucket_id >= n_buckets) return NULL; /* try to find next selem in the same bucket */ selem = prev_selem; count = 0; while (selem) { selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), struct bpf_local_storage_elem, map_node); if (!selem) { /* not found, unlock and go to the next bucket */ b = &smap->buckets[bucket_id++]; rcu_read_unlock(); skip_elems = 0; break; } sk_storage = rcu_dereference(selem->local_storage); if (sk_storage) { info->skip_elems = skip_elems + count; return selem; } count++; } for (i = bucket_id; i < (1U << smap->bucket_log); i++) { b = &smap->buckets[i]; rcu_read_lock(); count = 0; hlist_for_each_entry_rcu(selem, &b->list, map_node) { sk_storage = rcu_dereference(selem->local_storage); if (sk_storage && count >= skip_elems) { info->bucket_id = i; info->skip_elems = count; return selem; } count++; } rcu_read_unlock(); skip_elems = 0; } info->bucket_id = i; info->skip_elems = 0; return NULL; } static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) { struct bpf_local_storage_elem *selem; selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); if (!selem) return NULL; if (*pos == 0) ++*pos; return selem; } static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_iter_seq_sk_storage_map_info *info = seq->private; ++*pos; ++info->skip_elems; return bpf_sk_storage_map_seq_find_next(seq->private, v); } struct bpf_iter__bpf_sk_storage_map { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct bpf_map *, map); __bpf_md_ptr(struct sock *, sk); __bpf_md_ptr(void *, value); }; DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, struct bpf_map *map, struct sock *sk, void *value) static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, struct bpf_local_storage_elem *selem) { struct bpf_iter_seq_sk_storage_map_info *info = seq->private; struct bpf_iter__bpf_sk_storage_map ctx = {}; struct bpf_local_storage *sk_storage; struct bpf_iter_meta meta; struct bpf_prog *prog; int ret = 0; meta.seq = seq; prog = bpf_iter_get_info(&meta, selem == NULL); if (prog) { ctx.meta = &meta; ctx.map = info->map; if (selem) { sk_storage = rcu_dereference(selem->local_storage); ctx.sk = sk_storage->owner; ctx.value = SDATA(selem)->data; } ret = bpf_iter_run_prog(prog, &ctx); } return ret; } static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) { return __bpf_sk_storage_map_seq_show(seq, v); } static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { if (!v) (void)__bpf_sk_storage_map_seq_show(seq, v); else rcu_read_unlock(); } static int bpf_iter_init_sk_storage_map(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; bpf_map_inc_with_uref(aux->map); seq_info->map = aux->map; return 0; } static void bpf_iter_fini_sk_storage_map(void *priv_data) { struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; bpf_map_put_with_uref(seq_info->map); } static int bpf_iter_attach_map(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux) { struct bpf_map *map; int err = -EINVAL; if (!linfo->map.map_fd) return -EBADF; map = bpf_map_get_with_uref(linfo->map.map_fd); if (IS_ERR(map)) return PTR_ERR(map); if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) goto put_map; if (prog->aux->max_rdwr_access > map->value_size) { err = -EACCES; goto put_map; } aux->map = map; return 0; put_map: bpf_map_put_with_uref(map); return err; } static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) { bpf_map_put_with_uref(aux->map); } static const struct seq_operations bpf_sk_storage_map_seq_ops = { .start = bpf_sk_storage_map_seq_start, .next = bpf_sk_storage_map_seq_next, .stop = bpf_sk_storage_map_seq_stop, .show = bpf_sk_storage_map_seq_show, }; static const struct bpf_iter_seq_info iter_seq_info = { .seq_ops = &bpf_sk_storage_map_seq_ops, .init_seq_private = bpf_iter_init_sk_storage_map, .fini_seq_private = bpf_iter_fini_sk_storage_map, .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), }; static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { .target = "bpf_sk_storage_map", .attach_target = bpf_iter_attach_map, .detach_target = bpf_iter_detach_map, .show_fdinfo = bpf_iter_map_show_fdinfo, .fill_link_info = bpf_iter_map_fill_link_info, .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), PTR_TO_BTF_ID_OR_NULL }, { offsetof(struct bpf_iter__bpf_sk_storage_map, value), PTR_TO_BUF | PTR_MAYBE_NULL }, }, .seq_info = &iter_seq_info, }; static int __init bpf_sk_storage_map_iter_init(void) { bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_SOCK]; return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); } late_initcall(bpf_sk_storage_map_iter_init); |
15 16 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | /* RFCOMM implementation for Linux Bluetooth stack (BlueZ) Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ #include <linux/refcount.h> #ifndef __RFCOMM_H #define __RFCOMM_H #define RFCOMM_CONN_TIMEOUT (HZ * 30) #define RFCOMM_DISC_TIMEOUT (HZ * 20) #define RFCOMM_AUTH_TIMEOUT (HZ * 25) #define RFCOMM_IDLE_TIMEOUT (HZ * 2) #define RFCOMM_DEFAULT_MTU 127 #define RFCOMM_DEFAULT_CREDITS 7 #define RFCOMM_MAX_CREDITS 40 #define RFCOMM_SKB_HEAD_RESERVE 8 #define RFCOMM_SKB_TAIL_RESERVE 2 #define RFCOMM_SKB_RESERVE (RFCOMM_SKB_HEAD_RESERVE + RFCOMM_SKB_TAIL_RESERVE) #define RFCOMM_SABM 0x2f #define RFCOMM_DISC 0x43 #define RFCOMM_UA 0x63 #define RFCOMM_DM 0x0f #define RFCOMM_UIH 0xef #define RFCOMM_TEST 0x08 #define RFCOMM_FCON 0x28 #define RFCOMM_FCOFF 0x18 #define RFCOMM_MSC 0x38 #define RFCOMM_RPN 0x24 #define RFCOMM_RLS 0x14 #define RFCOMM_PN 0x20 #define RFCOMM_NSC 0x04 #define RFCOMM_V24_FC 0x02 #define RFCOMM_V24_RTC 0x04 #define RFCOMM_V24_RTR 0x08 #define RFCOMM_V24_IC 0x40 #define RFCOMM_V24_DV 0x80 #define RFCOMM_RPN_BR_2400 0x0 #define RFCOMM_RPN_BR_4800 0x1 #define RFCOMM_RPN_BR_7200 0x2 #define RFCOMM_RPN_BR_9600 0x3 #define RFCOMM_RPN_BR_19200 0x4 #define RFCOMM_RPN_BR_38400 0x5 #define RFCOMM_RPN_BR_57600 0x6 #define RFCOMM_RPN_BR_115200 0x7 #define RFCOMM_RPN_BR_230400 0x8 #define RFCOMM_RPN_DATA_5 0x0 #define RFCOMM_RPN_DATA_6 0x1 #define RFCOMM_RPN_DATA_7 0x2 #define RFCOMM_RPN_DATA_8 0x3 #define RFCOMM_RPN_STOP_1 0 #define RFCOMM_RPN_STOP_15 1 #define RFCOMM_RPN_PARITY_NONE 0x0 #define RFCOMM_RPN_PARITY_ODD 0x1 #define RFCOMM_RPN_PARITY_EVEN 0x3 #define RFCOMM_RPN_PARITY_MARK 0x5 #define RFCOMM_RPN_PARITY_SPACE 0x7 #define RFCOMM_RPN_FLOW_NONE 0x00 #define RFCOMM_RPN_XON_CHAR 0x11 #define RFCOMM_RPN_XOFF_CHAR 0x13 #define RFCOMM_RPN_PM_BITRATE 0x0001 #define RFCOMM_RPN_PM_DATA 0x0002 #define RFCOMM_RPN_PM_STOP 0x0004 #define RFCOMM_RPN_PM_PARITY 0x0008 #define RFCOMM_RPN_PM_PARITY_TYPE 0x0010 #define RFCOMM_RPN_PM_XON 0x0020 #define RFCOMM_RPN_PM_XOFF 0x0040 #define RFCOMM_RPN_PM_FLOW 0x3F00 #define RFCOMM_RPN_PM_ALL 0x3F7F struct rfcomm_hdr { u8 addr; u8 ctrl; u8 len; /* Actual size can be 2 bytes */ } __packed; struct rfcomm_cmd { u8 addr; u8 ctrl; u8 len; u8 fcs; } __packed; struct rfcomm_mcc { u8 type; u8 len; } __packed; struct rfcomm_pn { u8 dlci; u8 flow_ctrl; u8 priority; u8 ack_timer; __le16 mtu; u8 max_retrans; u8 credits; } __packed; struct rfcomm_rpn { u8 dlci; u8 bit_rate; u8 line_settings; u8 flow_ctrl; u8 xon_char; u8 xoff_char; __le16 param_mask; } __packed; struct rfcomm_rls { u8 dlci; u8 status; } __packed; struct rfcomm_msc { u8 dlci; u8 v24_sig; } __packed; /* ---- Core structures, flags etc ---- */ struct rfcomm_session { struct list_head list; struct socket *sock; struct timer_list timer; unsigned long state; unsigned long flags; int initiator; /* Default DLC parameters */ int cfc; uint mtu; struct list_head dlcs; }; struct rfcomm_dlc { struct list_head list; struct rfcomm_session *session; struct sk_buff_head tx_queue; struct timer_list timer; struct mutex lock; unsigned long state; unsigned long flags; refcount_t refcnt; u8 dlci; u8 addr; u8 priority; u8 v24_sig; u8 remote_v24_sig; u8 mscex; u8 out; u8 sec_level; u8 role_switch; u32 defer_setup; uint mtu; uint cfc; uint rx_credits; uint tx_credits; void *owner; void (*data_ready)(struct rfcomm_dlc *d, struct sk_buff *skb); void (*state_change)(struct rfcomm_dlc *d, int err); void (*modem_status)(struct rfcomm_dlc *d, u8 v24_sig); }; /* DLC and session flags */ #define RFCOMM_RX_THROTTLED 0 #define RFCOMM_TX_THROTTLED 1 #define RFCOMM_TIMED_OUT 2 #define RFCOMM_MSC_PENDING 3 #define RFCOMM_SEC_PENDING 4 #define RFCOMM_AUTH_PENDING 5 #define RFCOMM_AUTH_ACCEPT 6 #define RFCOMM_AUTH_REJECT 7 #define RFCOMM_DEFER_SETUP 8 #define RFCOMM_ENC_DROP 9 /* Scheduling flags and events */ #define RFCOMM_SCHED_WAKEUP 31 /* MSC exchange flags */ #define RFCOMM_MSCEX_TX 1 #define RFCOMM_MSCEX_RX 2 #define RFCOMM_MSCEX_OK (RFCOMM_MSCEX_TX + RFCOMM_MSCEX_RX) /* CFC states */ #define RFCOMM_CFC_UNKNOWN -1 #define RFCOMM_CFC_DISABLED 0 #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS /* ---- RFCOMM SEND RPN ---- */ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, u8 bit_rate, u8 data_bits, u8 stop_bits, u8 parity, u8 flow_ctrl_settings, u8 xon_char, u8 xoff_char, u16 param_mask); /* ---- RFCOMM DLCs (channels) ---- */ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb); void rfcomm_dlc_send_noerror(struct rfcomm_dlc *d, struct sk_buff *skb); int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig); int rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig); void rfcomm_dlc_accept(struct rfcomm_dlc *d); struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel); #define rfcomm_dlc_lock(d) mutex_lock(&d->lock) #define rfcomm_dlc_unlock(d) mutex_unlock(&d->lock) static inline void rfcomm_dlc_hold(struct rfcomm_dlc *d) { refcount_inc(&d->refcnt); } static inline void rfcomm_dlc_put(struct rfcomm_dlc *d) { if (refcount_dec_and_test(&d->refcnt)) rfcomm_dlc_free(d); } void __rfcomm_dlc_throttle(struct rfcomm_dlc *d); void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d); static inline void rfcomm_dlc_throttle(struct rfcomm_dlc *d) { if (!test_and_set_bit(RFCOMM_RX_THROTTLED, &d->flags)) __rfcomm_dlc_throttle(d); } static inline void rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) { if (test_and_clear_bit(RFCOMM_RX_THROTTLED, &d->flags)) __rfcomm_dlc_unthrottle(d); } /* ---- RFCOMM sessions ---- */ void rfcomm_session_getaddr(struct rfcomm_session *s, bdaddr_t *src, bdaddr_t *dst); /* ---- RFCOMM sockets ---- */ struct sockaddr_rc { sa_family_t rc_family; bdaddr_t rc_bdaddr; u8 rc_channel; }; #define RFCOMM_CONNINFO 0x02 struct rfcomm_conninfo { __u16 hci_handle; __u8 dev_class[3]; }; #define RFCOMM_LM 0x03 #define RFCOMM_LM_MASTER 0x0001 #define RFCOMM_LM_AUTH 0x0002 #define RFCOMM_LM_ENCRYPT 0x0004 #define RFCOMM_LM_TRUSTED 0x0008 #define RFCOMM_LM_RELIABLE 0x0010 #define RFCOMM_LM_SECURE 0x0020 #define RFCOMM_LM_FIPS 0x0040 #define rfcomm_pi(sk) ((struct rfcomm_pinfo *) sk) struct rfcomm_pinfo { struct bt_sock bt; bdaddr_t src; bdaddr_t dst; struct rfcomm_dlc *dlc; u8 channel; u8 sec_level; u8 role_switch; }; int rfcomm_init_sockets(void); void rfcomm_cleanup_sockets(void); int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc **d); /* ---- RFCOMM TTY ---- */ #define RFCOMM_MAX_DEV 256 #define RFCOMMCREATEDEV _IOW('R', 200, int) #define RFCOMMRELEASEDEV _IOW('R', 201, int) #define RFCOMMGETDEVLIST _IOR('R', 210, int) #define RFCOMMGETDEVINFO _IOR('R', 211, int) #define RFCOMMSTEALDLC _IOW('R', 220, int) /* rfcomm_dev.flags bit definitions */ #define RFCOMM_REUSE_DLC 0 #define RFCOMM_RELEASE_ONHUP 1 #define RFCOMM_HANGUP_NOW 2 #define RFCOMM_TTY_ATTACHED 3 #define RFCOMM_DEFUNCT_BIT4 4 /* don't reuse this bit - userspace visible */ /* rfcomm_dev.status bit definitions */ #define RFCOMM_DEV_RELEASED 0 #define RFCOMM_TTY_OWNED 1 struct rfcomm_dev_req { s16 dev_id; u32 flags; bdaddr_t src; bdaddr_t dst; u8 channel; }; struct rfcomm_dev_info { s16 id; u32 flags; u16 state; bdaddr_t src; bdaddr_t dst; u8 channel; }; struct rfcomm_dev_list_req { u16 dev_num; struct rfcomm_dev_info dev_info[] __counted_by(dev_num); }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); #ifdef CONFIG_BT_RFCOMM_TTY int rfcomm_init_ttys(void); void rfcomm_cleanup_ttys(void); #else static inline int rfcomm_init_ttys(void) { return 0; } static inline void rfcomm_cleanup_ttys(void) { } #endif #endif /* __RFCOMM_H */ |
1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for Corsair Void headsets * * Copyright (C) 2023-2024 Stuart Hayhurst */ /* -------------------------------------------------------------------------- */ /* Receiver report information: (ID 100) */ /* -------------------------------------------------------------------------- */ /* * When queried, the receiver reponds with 5 bytes to describe the battery * The power button, mute button and moving the mic also trigger this report * This includes power button + mic + connection + battery status and capacity * The information below may not be perfect, it's been gathered through guesses * * 0: REPORT ID * 100 for the battery packet * * 1: POWER BUTTON + (?) * Largest bit is 1 when power button pressed * * 2: BATTERY CAPACITY + MIC STATUS * Battery capacity: * Seems to report ~54 higher than reality when charging * Capped at 100, charging or not * Microphone status: * Largest bit is set to 1 when the mic is physically up * No bits change when the mic is muted, only when physically moved * This report is sent every time the mic is moved, no polling required * * 3: CONNECTION STATUS * 16: Wired headset * 38: Initialising * 49: Lost connection * 51: Disconnected, searching * 52: Disconnected, not searching * 177: Normal * * 4: BATTERY STATUS * 0: Disconnected * 1: Normal * 2: Low * 3: Critical - sent during shutdown * 4: Fully charged * 5: Charging */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* Receiver report information: (ID 102) */ /* -------------------------------------------------------------------------- */ /* * When queried, the recevier responds with 4 bytes to describe the firmware * The first 2 bytes are for the receiver, the second 2 are the headset * The headset firmware version will be 0 if no headset is connected * * 0: Recevier firmware major version * Major version of the receiver's firmware * * 1: Recevier firmware minor version * Minor version of the receiver's firmware * * 2: Headset firmware major version * Major version of the headset's firmware * * 3: Headset firmware minor version * Minor version of the headset's firmware */ /* -------------------------------------------------------------------------- */ #include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/cleanup.h> #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/power_supply.h> #include <linux/usb.h> #include <linux/workqueue.h> #include <asm/byteorder.h> #include "hid-ids.h" #define CORSAIR_VOID_DEVICE(id, type) { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, (id)), \ .driver_data = (type) } #define CORSAIR_VOID_WIRELESS_DEVICE(id) CORSAIR_VOID_DEVICE((id), CORSAIR_VOID_WIRELESS) #define CORSAIR_VOID_WIRED_DEVICE(id) CORSAIR_VOID_DEVICE((id), CORSAIR_VOID_WIRED) #define CORSAIR_VOID_STATUS_REQUEST_ID 0xC9 #define CORSAIR_VOID_NOTIF_REQUEST_ID 0xCA #define CORSAIR_VOID_SIDETONE_REQUEST_ID 0xFF #define CORSAIR_VOID_STATUS_REPORT_ID 0x64 #define CORSAIR_VOID_FIRMWARE_REPORT_ID 0x66 #define CORSAIR_VOID_USB_SIDETONE_REQUEST 0x1 #define CORSAIR_VOID_USB_SIDETONE_REQUEST_TYPE 0x21 #define CORSAIR_VOID_USB_SIDETONE_VALUE 0x200 #define CORSAIR_VOID_USB_SIDETONE_INDEX 0xB00 #define CORSAIR_VOID_MIC_MASK GENMASK(7, 7) #define CORSAIR_VOID_CAPACITY_MASK GENMASK(6, 0) #define CORSAIR_VOID_WIRELESS_CONNECTED 177 #define CORSAIR_VOID_SIDETONE_MAX_WIRELESS 55 #define CORSAIR_VOID_SIDETONE_MAX_WIRED 4096 enum { CORSAIR_VOID_WIRELESS, CORSAIR_VOID_WIRED, }; enum { CORSAIR_VOID_BATTERY_NORMAL = 1, CORSAIR_VOID_BATTERY_LOW = 2, CORSAIR_VOID_BATTERY_CRITICAL = 3, CORSAIR_VOID_BATTERY_CHARGED = 4, CORSAIR_VOID_BATTERY_CHARGING = 5, }; static enum power_supply_property corsair_void_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_CAPACITY, POWER_SUPPLY_PROP_CAPACITY_LEVEL, POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, }; struct corsair_void_battery_data { int status; bool present; int capacity; int capacity_level; }; struct corsair_void_drvdata { struct hid_device *hid_dev; struct device *dev; char *name; bool is_wired; unsigned int sidetone_max; struct corsair_void_battery_data battery_data; bool mic_up; bool connected; int fw_receiver_major; int fw_receiver_minor; int fw_headset_major; int fw_headset_minor; struct power_supply *battery; struct power_supply_desc battery_desc; struct mutex battery_mutex; struct delayed_work delayed_status_work; struct delayed_work delayed_firmware_work; struct work_struct battery_remove_work; struct work_struct battery_add_work; }; /* * Functions to process receiver data */ static void corsair_void_set_wireless_status(struct corsair_void_drvdata *drvdata) { struct usb_interface *usb_if = to_usb_interface(drvdata->dev->parent); if (drvdata->is_wired) return; usb_set_wireless_status(usb_if, drvdata->connected ? USB_WIRELESS_STATUS_CONNECTED : USB_WIRELESS_STATUS_DISCONNECTED); } static void corsair_void_set_unknown_batt(struct corsair_void_drvdata *drvdata) { struct corsair_void_battery_data *battery_data = &drvdata->battery_data; battery_data->status = POWER_SUPPLY_STATUS_UNKNOWN; battery_data->present = false; battery_data->capacity = 0; battery_data->capacity_level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN; } /* Reset data that may change between wireless connections */ static void corsair_void_set_unknown_wireless_data(struct corsair_void_drvdata *drvdata) { /* Only 0 out headset, receiver is always known if relevant */ drvdata->fw_headset_major = 0; drvdata->fw_headset_minor = 0; drvdata->connected = false; drvdata->mic_up = false; corsair_void_set_wireless_status(drvdata); } static void corsair_void_process_receiver(struct corsair_void_drvdata *drvdata, int raw_battery_capacity, int raw_connection_status, int raw_battery_status) { struct corsair_void_battery_data *battery_data = &drvdata->battery_data; struct corsair_void_battery_data orig_battery_data; /* Save initial battery data, to compare later */ orig_battery_data = *battery_data; /* Headset not connected, or it's wired */ if (raw_connection_status != CORSAIR_VOID_WIRELESS_CONNECTED) goto unknown_battery; /* Battery information unavailable */ if (raw_battery_status == 0) goto unknown_battery; /* Battery must be connected then */ battery_data->present = true; battery_data->capacity_level = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL; /* Set battery status */ switch (raw_battery_status) { case CORSAIR_VOID_BATTERY_NORMAL: case CORSAIR_VOID_BATTERY_LOW: case CORSAIR_VOID_BATTERY_CRITICAL: battery_data->status = POWER_SUPPLY_STATUS_DISCHARGING; if (raw_battery_status == CORSAIR_VOID_BATTERY_LOW) battery_data->capacity_level = POWER_SUPPLY_CAPACITY_LEVEL_LOW; else if (raw_battery_status == CORSAIR_VOID_BATTERY_CRITICAL) battery_data->capacity_level = POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL; break; case CORSAIR_VOID_BATTERY_CHARGED: battery_data->status = POWER_SUPPLY_STATUS_FULL; break; case CORSAIR_VOID_BATTERY_CHARGING: battery_data->status = POWER_SUPPLY_STATUS_CHARGING; break; default: hid_warn(drvdata->hid_dev, "unknown battery status '%d'", raw_battery_status); goto unknown_battery; break; } battery_data->capacity = raw_battery_capacity; corsair_void_set_wireless_status(drvdata); goto success; unknown_battery: corsair_void_set_unknown_batt(drvdata); success: /* Inform power supply if battery values changed */ if (memcmp(&orig_battery_data, battery_data, sizeof(*battery_data))) { scoped_guard(mutex, &drvdata->battery_mutex) { if (drvdata->battery) { power_supply_changed(drvdata->battery); } } } } /* * Functions to report stored data */ static int corsair_void_battery_get_property(struct power_supply *psy, enum power_supply_property prop, union power_supply_propval *val) { struct corsair_void_drvdata *drvdata = power_supply_get_drvdata(psy); switch (prop) { case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; case POWER_SUPPLY_PROP_MODEL_NAME: if (!strncmp(drvdata->hid_dev->name, "Corsair ", 8)) val->strval = drvdata->hid_dev->name + 8; else val->strval = drvdata->hid_dev->name; break; case POWER_SUPPLY_PROP_MANUFACTURER: val->strval = "Corsair"; break; case POWER_SUPPLY_PROP_STATUS: val->intval = drvdata->battery_data.status; break; case POWER_SUPPLY_PROP_PRESENT: val->intval = drvdata->battery_data.present; break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = drvdata->battery_data.capacity; break; case POWER_SUPPLY_PROP_CAPACITY_LEVEL: val->intval = drvdata->battery_data.capacity_level; break; default: return -EINVAL; } return 0; } static ssize_t microphone_up_show(struct device *dev, struct device_attribute *attr, char *buf) { struct corsair_void_drvdata *drvdata = dev_get_drvdata(dev); if (!drvdata->connected) return -ENODEV; return sysfs_emit(buf, "%d\n", drvdata->mic_up); } static ssize_t fw_version_receiver_show(struct device *dev, struct device_attribute *attr, char *buf) { struct corsair_void_drvdata *drvdata = dev_get_drvdata(dev); if (drvdata->fw_receiver_major == 0 && drvdata->fw_receiver_minor == 0) return -ENODATA; return sysfs_emit(buf, "%d.%02d\n", drvdata->fw_receiver_major, drvdata->fw_receiver_minor); } static ssize_t fw_version_headset_show(struct device *dev, struct device_attribute *attr, char *buf) { struct corsair_void_drvdata *drvdata = dev_get_drvdata(dev); if (drvdata->fw_headset_major == 0 && drvdata->fw_headset_minor == 0) return -ENODATA; return sysfs_emit(buf, "%d.%02d\n", drvdata->fw_headset_major, drvdata->fw_headset_minor); } static ssize_t sidetone_max_show(struct device *dev, struct device_attribute *attr, char *buf) { struct corsair_void_drvdata *drvdata = dev_get_drvdata(dev); return sysfs_emit(buf, "%d\n", drvdata->sidetone_max); } /* * Functions to send data to headset */ static ssize_t send_alert_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct corsair_void_drvdata *drvdata = dev_get_drvdata(dev); struct hid_device *hid_dev = drvdata->hid_dev; unsigned char alert_id; unsigned char *send_buf __free(kfree) = NULL; int ret; if (!drvdata->connected || drvdata->is_wired) return -ENODEV; /* Only accept 0 or 1 for alert ID */ if (kstrtou8(buf, 10, &alert_id) || alert_id >= 2) return -EINVAL; send_buf = kmalloc(3, GFP_KERNEL); if (!send_buf) return -ENOMEM; /* Packet format to send alert with ID alert_id */ send_buf[0] = CORSAIR_VOID_NOTIF_REQUEST_ID; send_buf[1] = 0x02; send_buf[2] = alert_id; ret = hid_hw_raw_request(hid_dev, CORSAIR_VOID_NOTIF_REQUEST_ID, send_buf, 3, HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); if (ret < 0) hid_warn(hid_dev, "failed to send alert request (reason: %d)", ret); else ret = count; return ret; } static int corsair_void_set_sidetone_wired(struct device *dev, const char *buf, unsigned int sidetone) { struct usb_interface *usb_if = to_usb_interface(dev->parent); struct usb_device *usb_dev = interface_to_usbdev(usb_if); /* Packet format to set sidetone for wired headsets */ __le16 sidetone_le = cpu_to_le16(sidetone); return usb_control_msg_send(usb_dev, 0, CORSAIR_VOID_USB_SIDETONE_REQUEST, CORSAIR_VOID_USB_SIDETONE_REQUEST_TYPE, CORSAIR_VOID_USB_SIDETONE_VALUE, CORSAIR_VOID_USB_SIDETONE_INDEX, &sidetone_le, 2, USB_CTRL_SET_TIMEOUT, GFP_KERNEL); } static int corsair_void_set_sidetone_wireless(struct device *dev, const char *buf, unsigned char sidetone) { struct corsair_void_drvdata *drvdata = dev_get_drvdata(dev); struct hid_device *hid_dev = drvdata->hid_dev; unsigned char *send_buf __free(kfree) = NULL; send_buf = kmalloc(12, GFP_KERNEL); if (!send_buf) return -ENOMEM; /* Packet format to set sidetone for wireless headsets */ send_buf[0] = CORSAIR_VOID_SIDETONE_REQUEST_ID; send_buf[1] = 0x0B; send_buf[2] = 0x00; send_buf[3] = 0xFF; send_buf[4] = 0x04; send_buf[5] = 0x0E; send_buf[6] = 0xFF; send_buf[7] = 0x05; send_buf[8] = 0x01; send_buf[9] = 0x04; send_buf[10] = 0x00; send_buf[11] = sidetone + 200; return hid_hw_raw_request(hid_dev, CORSAIR_VOID_SIDETONE_REQUEST_ID, send_buf, 12, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); } static ssize_t set_sidetone_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct corsair_void_drvdata *drvdata = dev_get_drvdata(dev); struct hid_device *hid_dev = drvdata->hid_dev; unsigned int sidetone; int ret; if (!drvdata->connected) return -ENODEV; /* sidetone must be between 0 and drvdata->sidetone_max inclusive */ if (kstrtouint(buf, 10, &sidetone) || sidetone > drvdata->sidetone_max) return -EINVAL; if (drvdata->is_wired) ret = corsair_void_set_sidetone_wired(dev, buf, sidetone); else ret = corsair_void_set_sidetone_wireless(dev, buf, sidetone); if (ret < 0) hid_warn(hid_dev, "failed to send sidetone (reason: %d)", ret); else ret = count; return ret; } static int corsair_void_request_status(struct hid_device *hid_dev, int id) { unsigned char *send_buf __free(kfree) = NULL; send_buf = kmalloc(2, GFP_KERNEL); if (!send_buf) return -ENOMEM; /* Packet format to request data item (status / firmware) refresh */ send_buf[0] = CORSAIR_VOID_STATUS_REQUEST_ID; send_buf[1] = id; /* Send request for data refresh */ return hid_hw_raw_request(hid_dev, CORSAIR_VOID_STATUS_REQUEST_ID, send_buf, 2, HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); } /* * Headset connect / disconnect handlers and work handlers */ static void corsair_void_status_work_handler(struct work_struct *work) { struct corsair_void_drvdata *drvdata; struct delayed_work *delayed_work; int battery_ret; delayed_work = container_of(work, struct delayed_work, work); drvdata = container_of(delayed_work, struct corsair_void_drvdata, delayed_status_work); battery_ret = corsair_void_request_status(drvdata->hid_dev, CORSAIR_VOID_STATUS_REPORT_ID); if (battery_ret < 0) { hid_warn(drvdata->hid_dev, "failed to request battery (reason: %d)", battery_ret); } } static void corsair_void_firmware_work_handler(struct work_struct *work) { struct corsair_void_drvdata *drvdata; struct delayed_work *delayed_work; int firmware_ret; delayed_work = container_of(work, struct delayed_work, work); drvdata = container_of(delayed_work, struct corsair_void_drvdata, delayed_firmware_work); firmware_ret = corsair_void_request_status(drvdata->hid_dev, CORSAIR_VOID_FIRMWARE_REPORT_ID); if (firmware_ret < 0) { hid_warn(drvdata->hid_dev, "failed to request firmware (reason: %d)", firmware_ret); } } static void corsair_void_battery_remove_work_handler(struct work_struct *work) { struct corsair_void_drvdata *drvdata; drvdata = container_of(work, struct corsair_void_drvdata, battery_remove_work); scoped_guard(mutex, &drvdata->battery_mutex) { if (drvdata->battery) { power_supply_unregister(drvdata->battery); drvdata->battery = NULL; } } } static void corsair_void_battery_add_work_handler(struct work_struct *work) { struct corsair_void_drvdata *drvdata; struct power_supply_config psy_cfg = {}; struct power_supply *new_supply; drvdata = container_of(work, struct corsair_void_drvdata, battery_add_work); guard(mutex)(&drvdata->battery_mutex); if (drvdata->battery) return; psy_cfg.drv_data = drvdata; new_supply = power_supply_register(drvdata->dev, &drvdata->battery_desc, &psy_cfg); if (IS_ERR(new_supply)) { hid_err(drvdata->hid_dev, "failed to register battery '%s' (reason: %ld)\n", drvdata->battery_desc.name, PTR_ERR(new_supply)); return; } if (power_supply_powers(new_supply, drvdata->dev)) { power_supply_unregister(new_supply); return; } drvdata->battery = new_supply; } static void corsair_void_headset_connected(struct corsair_void_drvdata *drvdata) { schedule_work(&drvdata->battery_add_work); schedule_delayed_work(&drvdata->delayed_firmware_work, msecs_to_jiffies(100)); } static void corsair_void_headset_disconnected(struct corsair_void_drvdata *drvdata) { schedule_work(&drvdata->battery_remove_work); corsair_void_set_unknown_wireless_data(drvdata); corsair_void_set_unknown_batt(drvdata); } /* * Driver setup, probing and HID event handling */ static DEVICE_ATTR_RO(fw_version_receiver); static DEVICE_ATTR_RO(fw_version_headset); static DEVICE_ATTR_RO(microphone_up); static DEVICE_ATTR_RO(sidetone_max); static DEVICE_ATTR_WO(send_alert); static DEVICE_ATTR_WO(set_sidetone); static struct attribute *corsair_void_attrs[] = { &dev_attr_fw_version_receiver.attr, &dev_attr_fw_version_headset.attr, &dev_attr_microphone_up.attr, &dev_attr_send_alert.attr, &dev_attr_set_sidetone.attr, &dev_attr_sidetone_max.attr, NULL, }; static const struct attribute_group corsair_void_attr_group = { .attrs = corsair_void_attrs, }; static int corsair_void_probe(struct hid_device *hid_dev, const struct hid_device_id *hid_id) { int ret; struct corsair_void_drvdata *drvdata; char *name; if (!hid_is_usb(hid_dev)) return -EINVAL; drvdata = devm_kzalloc(&hid_dev->dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; hid_set_drvdata(hid_dev, drvdata); dev_set_drvdata(&hid_dev->dev, drvdata); drvdata->dev = &hid_dev->dev; drvdata->hid_dev = hid_dev; drvdata->is_wired = hid_id->driver_data == CORSAIR_VOID_WIRED; drvdata->sidetone_max = CORSAIR_VOID_SIDETONE_MAX_WIRELESS; if (drvdata->is_wired) drvdata->sidetone_max = CORSAIR_VOID_SIDETONE_MAX_WIRED; /* Set initial values for no wireless headset attached */ /* If a headset is attached, it'll be prompted later */ corsair_void_set_unknown_wireless_data(drvdata); corsair_void_set_unknown_batt(drvdata); /* Receiver version won't be reset after init */ /* Headset version already set via set_unknown_wireless_data */ drvdata->fw_receiver_major = 0; drvdata->fw_receiver_minor = 0; ret = hid_parse(hid_dev); if (ret) { hid_err(hid_dev, "parse failed (reason: %d)\n", ret); return ret; } name = devm_kasprintf(drvdata->dev, GFP_KERNEL, "corsair-void-%d-battery", hid_dev->id); if (!name) return -ENOMEM; drvdata->battery_desc.name = name; drvdata->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY; drvdata->battery_desc.properties = corsair_void_battery_props; drvdata->battery_desc.num_properties = ARRAY_SIZE(corsair_void_battery_props); drvdata->battery_desc.get_property = corsair_void_battery_get_property; drvdata->battery = NULL; INIT_WORK(&drvdata->battery_remove_work, corsair_void_battery_remove_work_handler); INIT_WORK(&drvdata->battery_add_work, corsair_void_battery_add_work_handler); ret = devm_mutex_init(drvdata->dev, &drvdata->battery_mutex); if (ret) return ret; ret = sysfs_create_group(&hid_dev->dev.kobj, &corsair_void_attr_group); if (ret) return ret; /* Any failures after here will need to call hid_hw_stop */ ret = hid_hw_start(hid_dev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hid_dev, "hid_hw_start failed (reason: %d)\n", ret); goto failed_after_sysfs; } /* Refresh battery data, in case wireless headset is already connected */ INIT_DELAYED_WORK(&drvdata->delayed_status_work, corsair_void_status_work_handler); schedule_delayed_work(&drvdata->delayed_status_work, msecs_to_jiffies(100)); /* Refresh firmware versions */ INIT_DELAYED_WORK(&drvdata->delayed_firmware_work, corsair_void_firmware_work_handler); schedule_delayed_work(&drvdata->delayed_firmware_work, msecs_to_jiffies(100)); return 0; failed_after_sysfs: sysfs_remove_group(&hid_dev->dev.kobj, &corsair_void_attr_group); return ret; } static void corsair_void_remove(struct hid_device *hid_dev) { struct corsair_void_drvdata *drvdata = hid_get_drvdata(hid_dev); hid_hw_stop(hid_dev); cancel_work_sync(&drvdata->battery_remove_work); cancel_work_sync(&drvdata->battery_add_work); if (drvdata->battery) power_supply_unregister(drvdata->battery); cancel_delayed_work_sync(&drvdata->delayed_status_work); cancel_delayed_work_sync(&drvdata->delayed_firmware_work); sysfs_remove_group(&hid_dev->dev.kobj, &corsair_void_attr_group); } static int corsair_void_raw_event(struct hid_device *hid_dev, struct hid_report *hid_report, u8 *data, int size) { struct corsair_void_drvdata *drvdata = hid_get_drvdata(hid_dev); bool was_connected = drvdata->connected; /* Description of packets are documented at the top of this file */ if (hid_report->id == CORSAIR_VOID_STATUS_REPORT_ID) { drvdata->mic_up = FIELD_GET(CORSAIR_VOID_MIC_MASK, data[2]); drvdata->connected = (data[3] == CORSAIR_VOID_WIRELESS_CONNECTED) || drvdata->is_wired; corsair_void_process_receiver(drvdata, FIELD_GET(CORSAIR_VOID_CAPACITY_MASK, data[2]), data[3], data[4]); } else if (hid_report->id == CORSAIR_VOID_FIRMWARE_REPORT_ID) { drvdata->fw_receiver_major = data[1]; drvdata->fw_receiver_minor = data[2]; drvdata->fw_headset_major = data[3]; drvdata->fw_headset_minor = data[4]; } /* Handle wireless headset connect / disconnect */ if ((was_connected != drvdata->connected) && !drvdata->is_wired) { if (drvdata->connected) corsair_void_headset_connected(drvdata); else corsair_void_headset_disconnected(drvdata); } return 0; } static const struct hid_device_id corsair_void_devices[] = { /* Corsair Void Wireless */ CORSAIR_VOID_WIRELESS_DEVICE(0x0a0c), CORSAIR_VOID_WIRELESS_DEVICE(0x0a2b), CORSAIR_VOID_WIRELESS_DEVICE(0x1b23), CORSAIR_VOID_WIRELESS_DEVICE(0x1b25), CORSAIR_VOID_WIRELESS_DEVICE(0x1b27), /* Corsair Void USB */ CORSAIR_VOID_WIRED_DEVICE(0x0a0f), CORSAIR_VOID_WIRED_DEVICE(0x1b1c), CORSAIR_VOID_WIRED_DEVICE(0x1b29), CORSAIR_VOID_WIRED_DEVICE(0x1b2a), /* Corsair Void Surround */ CORSAIR_VOID_WIRED_DEVICE(0x0a30), CORSAIR_VOID_WIRED_DEVICE(0x0a31), /* Corsair Void Pro Wireless */ CORSAIR_VOID_WIRELESS_DEVICE(0x0a14), CORSAIR_VOID_WIRELESS_DEVICE(0x0a16), CORSAIR_VOID_WIRELESS_DEVICE(0x0a1a), /* Corsair Void Pro USB */ CORSAIR_VOID_WIRED_DEVICE(0x0a17), CORSAIR_VOID_WIRED_DEVICE(0x0a1d), /* Corsair Void Pro Surround */ CORSAIR_VOID_WIRED_DEVICE(0x0a18), CORSAIR_VOID_WIRED_DEVICE(0x0a1e), CORSAIR_VOID_WIRED_DEVICE(0x0a1f), /* Corsair Void Elite Wireless */ CORSAIR_VOID_WIRELESS_DEVICE(0x0a51), CORSAIR_VOID_WIRELESS_DEVICE(0x0a55), CORSAIR_VOID_WIRELESS_DEVICE(0x0a75), /* Corsair Void Elite USB */ CORSAIR_VOID_WIRED_DEVICE(0x0a52), CORSAIR_VOID_WIRED_DEVICE(0x0a56), /* Corsair Void Elite Surround */ CORSAIR_VOID_WIRED_DEVICE(0x0a53), CORSAIR_VOID_WIRED_DEVICE(0x0a57), {} }; MODULE_DEVICE_TABLE(hid, corsair_void_devices); static struct hid_driver corsair_void_driver = { .name = "hid-corsair-void", .id_table = corsair_void_devices, .probe = corsair_void_probe, .remove = corsair_void_remove, .raw_event = corsair_void_raw_event, }; module_hid_driver(corsair_void_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Stuart Hayhurst <stuart.a.hayhurst@gmail.com>"); MODULE_DESCRIPTION("HID driver for Corsair Void headsets"); |
54 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | #ifndef INTERNAL_IO_WQ_H #define INTERNAL_IO_WQ_H #include <linux/refcount.h> #include <linux/io_uring_types.h> struct io_wq; enum { IO_WQ_WORK_CANCEL = 1, IO_WQ_WORK_HASHED = 2, IO_WQ_WORK_UNBOUND = 4, IO_WQ_WORK_CONCURRENT = 16, IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ }; enum io_wq_cancel { IO_WQ_CANCEL_OK, /* cancelled before started */ IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */ IO_WQ_CANCEL_NOTFOUND, /* work not found */ }; typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); typedef void (io_wq_work_fn)(struct io_wq_work *); struct io_wq_hash { refcount_t refs; unsigned long map; struct wait_queue_head wait; }; static inline void io_wq_put_hash(struct io_wq_hash *hash) { if (refcount_dec_and_test(&hash->refs)) kfree(hash); } struct io_wq_data { struct io_wq_hash *hash; struct task_struct *task; io_wq_work_fn *do_work; free_work_fn *free_work; }; struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); void io_wq_exit_start(struct io_wq *wq); void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_hash_work(struct io_wq_work *work, void *val); int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask); int io_wq_max_workers(struct io_wq *wq, int *new_count); bool io_wq_worker_stopped(void); static inline bool io_wq_is_hashed(struct io_wq_work *work) { return atomic_read(&work->flags) & IO_WQ_WORK_HASHED; } typedef bool (work_cancel_fn)(struct io_wq_work *, void *); enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, void *data, bool cancel_all); #if defined(CONFIG_IO_WQ) extern void io_wq_worker_sleeping(struct task_struct *); extern void io_wq_worker_running(struct task_struct *); #else static inline void io_wq_worker_sleeping(struct task_struct *tsk) { } static inline void io_wq_worker_running(struct task_struct *tsk) { } #endif static inline bool io_wq_current_is_worker(void) { return in_task() && (current->flags & PF_IO_WORKER) && current->worker_private; } #endif |
2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | // SPDX-License-Identifier: GPL-2.0 /* * USB 7 Segment Driver * * Copyright (C) 2008 Harrison Metzger <harrisonmetz@gmail.com> * Based on usbled.c by Greg Kroah-Hartman (greg@kroah.com) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/string.h> #include <linux/usb.h> #define DRIVER_AUTHOR "Harrison Metzger <harrisonmetz@gmail.com>" #define DRIVER_DESC "USB 7 Segment Driver" #define VENDOR_ID 0x0fc5 #define PRODUCT_ID 0x1227 #define MAXLEN 8 /* table of devices that work with this driver */ static const struct usb_device_id id_table[] = { { USB_DEVICE(VENDOR_ID, PRODUCT_ID) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); /* the different text display modes the device is capable of */ static const char *display_textmodes[] = {"raw", "hex", "ascii"}; struct usb_sevsegdev { struct usb_device *udev; struct usb_interface *intf; u8 powered; u8 mode_msb; u8 mode_lsb; u8 decimals[MAXLEN]; u8 textmode; u8 text[MAXLEN]; u16 textlength; u8 shadow_power; /* for PM */ u8 has_interface_pm; }; /* sysfs_streq can't replace this completely * If the device was in hex mode, and the user wanted a 0, * if str commands are used, we would assume the end of string * so mem commands are used. */ static inline size_t my_memlen(const char *buf, size_t count) { if (count > 0 && buf[count-1] == '\n') return count - 1; else return count; } static void update_display_powered(struct usb_sevsegdev *mydev) { int rc; if (mydev->powered && !mydev->has_interface_pm) { rc = usb_autopm_get_interface(mydev->intf); if (rc < 0) return; mydev->has_interface_pm = 1; } if (mydev->shadow_power != 1) return; rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48, (80 * 0x100) + 10, /* (power mode) */ (0x00 * 0x100) + (mydev->powered ? 1 : 0), NULL, 0, 2000, GFP_KERNEL); if (rc < 0) dev_dbg(&mydev->udev->dev, "power retval = %d\n", rc); if (!mydev->powered && mydev->has_interface_pm) { usb_autopm_put_interface(mydev->intf); mydev->has_interface_pm = 0; } } static void update_display_mode(struct usb_sevsegdev *mydev) { int rc; if(mydev->shadow_power != 1) return; rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48, (82 * 0x100) + 10, /* (set mode) */ (mydev->mode_msb * 0x100) + mydev->mode_lsb, NULL, 0, 2000, GFP_NOIO); if (rc < 0) dev_dbg(&mydev->udev->dev, "mode retval = %d\n", rc); } static void update_display_visual(struct usb_sevsegdev *mydev, gfp_t mf) { int rc; int i; unsigned char buffer[MAXLEN] = {0}; u8 decimals = 0; if(mydev->shadow_power != 1) return; /* The device is right to left, where as you write left to right */ for (i = 0; i < mydev->textlength; i++) buffer[i] = mydev->text[mydev->textlength-1-i]; rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48, (85 * 0x100) + 10, /* (write text) */ (0 * 0x100) + mydev->textmode, /* mode */ &buffer, mydev->textlength, 2000, mf); if (rc < 0) dev_dbg(&mydev->udev->dev, "write retval = %d\n", rc); /* The device is right to left, where as you write left to right */ for (i = 0; i < sizeof(mydev->decimals); i++) decimals |= mydev->decimals[i] << i; rc = usb_control_msg_send(mydev->udev, 0, 0x12, 0x48, (86 * 0x100) + 10, /* (set decimal) */ (0 * 0x100) + decimals, /* decimals */ NULL, 0, 2000, mf); if (rc < 0) dev_dbg(&mydev->udev->dev, "decimal retval = %d\n", rc); } #define MYDEV_ATTR_SIMPLE_UNSIGNED(name, update_fcn) \ static ssize_t name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ struct usb_sevsegdev *mydev = usb_get_intfdata(intf); \ \ return sprintf(buf, "%u\n", mydev->name); \ } \ \ static ssize_t name##_store(struct device *dev, \ struct device_attribute *attr, const char *buf, size_t count) \ { \ struct usb_interface *intf = to_usb_interface(dev); \ struct usb_sevsegdev *mydev = usb_get_intfdata(intf); \ \ mydev->name = simple_strtoul(buf, NULL, 10); \ update_fcn(mydev); \ \ return count; \ } \ static DEVICE_ATTR_RW(name); static ssize_t text_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_sevsegdev *mydev = usb_get_intfdata(intf); return sysfs_emit(buf, "%s\n", mydev->text); } static ssize_t text_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); struct usb_sevsegdev *mydev = usb_get_intfdata(intf); size_t end = my_memlen(buf, count); if (end > sizeof(mydev->text)) return -EINVAL; memset(mydev->text, 0, sizeof(mydev->text)); mydev->textlength = end; if (end > 0) memcpy(mydev->text, buf, end); update_display_visual(mydev, GFP_KERNEL); return count; } static DEVICE_ATTR_RW(text); static ssize_t decimals_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_sevsegdev *mydev = usb_get_intfdata(intf); int i; int pos; for (i = 0; i < sizeof(mydev->decimals); i++) { pos = sizeof(mydev->decimals) - 1 - i; if (mydev->decimals[i] == 0) buf[pos] = '0'; else if (mydev->decimals[i] == 1) buf[pos] = '1'; else buf[pos] = 'x'; } buf[sizeof(mydev->decimals)] = '\n'; return sizeof(mydev->decimals) + 1; } static ssize_t decimals_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); struct usb_sevsegdev *mydev = usb_get_intfdata(intf); size_t end = my_memlen(buf, count); int i; if (end > sizeof(mydev->decimals)) return -EINVAL; for (i = 0; i < end; i++) if (buf[i] != '0' && buf[i] != '1') return -EINVAL; memset(mydev->decimals, 0, sizeof(mydev->decimals)); for (i = 0; i < end; i++) if (buf[i] == '1') mydev->decimals[end-1-i] = 1; update_display_visual(mydev, GFP_KERNEL); return count; } static DEVICE_ATTR_RW(decimals); static ssize_t textmode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usb_sevsegdev *mydev = usb_get_intfdata(intf); int i; buf[0] = 0; for (i = 0; i < ARRAY_SIZE(display_textmodes); i++) { if (mydev->textmode == i) { strcat(buf, " ["); strcat(buf, display_textmodes[i]); strcat(buf, "] "); } else { strcat(buf, " "); strcat(buf, display_textmodes[i]); strcat(buf, " "); } } strcat(buf, "\n"); return strlen(buf); } static ssize_t textmode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct usb_interface *intf = to_usb_interface(dev); struct usb_sevsegdev *mydev = usb_get_intfdata(intf); int i; i = sysfs_match_string(display_textmodes, buf); if (i < 0) return i; mydev->textmode = i; update_display_visual(mydev, GFP_KERNEL); return count; } static DEVICE_ATTR_RW(textmode); MYDEV_ATTR_SIMPLE_UNSIGNED(powered, update_display_powered); MYDEV_ATTR_SIMPLE_UNSIGNED(mode_msb, update_display_mode); MYDEV_ATTR_SIMPLE_UNSIGNED(mode_lsb, update_display_mode); static struct attribute *sevseg_attrs[] = { &dev_attr_powered.attr, &dev_attr_text.attr, &dev_attr_textmode.attr, &dev_attr_decimals.attr, &dev_attr_mode_msb.attr, &dev_attr_mode_lsb.attr, NULL }; ATTRIBUTE_GROUPS(sevseg); static int sevseg_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_sevsegdev *mydev; int rc = -ENOMEM; mydev = kzalloc(sizeof(struct usb_sevsegdev), GFP_KERNEL); if (!mydev) goto error_mem; mydev->udev = usb_get_dev(udev); mydev->intf = interface; usb_set_intfdata(interface, mydev); /* PM */ mydev->shadow_power = 1; /* currently active */ mydev->has_interface_pm = 0; /* have not issued autopm_get */ /*set defaults */ mydev->textmode = 0x02; /* ascii mode */ mydev->mode_msb = 0x06; /* 6 characters */ mydev->mode_lsb = 0x3f; /* scanmode for 6 chars */ dev_info(&interface->dev, "USB 7 Segment device now attached\n"); return 0; error_mem: return rc; } static void sevseg_disconnect(struct usb_interface *interface) { struct usb_sevsegdev *mydev; mydev = usb_get_intfdata(interface); usb_set_intfdata(interface, NULL); usb_put_dev(mydev->udev); kfree(mydev); dev_info(&interface->dev, "USB 7 Segment now disconnected\n"); } static int sevseg_suspend(struct usb_interface *intf, pm_message_t message) { struct usb_sevsegdev *mydev; mydev = usb_get_intfdata(intf); mydev->shadow_power = 0; return 0; } static int sevseg_resume(struct usb_interface *intf) { struct usb_sevsegdev *mydev; mydev = usb_get_intfdata(intf); mydev->shadow_power = 1; update_display_mode(mydev); update_display_visual(mydev, GFP_NOIO); return 0; } static int sevseg_reset_resume(struct usb_interface *intf) { struct usb_sevsegdev *mydev; mydev = usb_get_intfdata(intf); mydev->shadow_power = 1; update_display_mode(mydev); update_display_visual(mydev, GFP_NOIO); return 0; } static struct usb_driver sevseg_driver = { .name = "usbsevseg", .probe = sevseg_probe, .disconnect = sevseg_disconnect, .suspend = sevseg_suspend, .resume = sevseg_resume, .reset_resume = sevseg_reset_resume, .id_table = id_table, .dev_groups = sevseg_groups, .supports_autosuspend = 1, }; module_usb_driver(sevseg_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); |
1 1 2 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Roccat Kone driver for Linux * * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net> */ /* */ /* * Roccat Kone is a gamer mouse which consists of a mouse part and a keyboard * part. The keyboard part enables the mouse to execute stored macros with mixed * key- and button-events. * * TODO implement on-the-fly polling-rate change * The windows driver has the ability to change the polling rate of the * device on the press of a mousebutton. * Is it possible to remove and reinstall the urb in raw-event- or any * other handler, or to defer this action to be executed somewhere else? * * TODO is it possible to overwrite group for sysfs attributes via udev? */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/hid-roccat.h> #include "hid-ids.h" #include "hid-roccat-common.h" #include "hid-roccat-kone.h" static uint profile_numbers[5] = {0, 1, 2, 3, 4}; static void kone_profile_activated(struct kone_device *kone, uint new_profile) { kone->actual_profile = new_profile; kone->actual_dpi = kone->profiles[new_profile - 1].startup_dpi; } static void kone_profile_report(struct kone_device *kone, uint new_profile) { struct kone_roccat_report roccat_report; roccat_report.event = kone_mouse_event_switch_profile; roccat_report.value = new_profile; roccat_report.key = 0; roccat_report_event(kone->chrdev_minor, (uint8_t *)&roccat_report); } static int kone_receive(struct usb_device *usb_dev, uint usb_command, void *data, uint size) { char *buf; int len; buf = kmalloc(size, GFP_KERNEL); if (buf == NULL) return -ENOMEM; len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), HID_REQ_GET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, usb_command, 0, buf, size, USB_CTRL_SET_TIMEOUT); memcpy(data, buf, size); kfree(buf); return ((len < 0) ? len : ((len != size) ? -EIO : 0)); } static int kone_send(struct usb_device *usb_dev, uint usb_command, void const *data, uint size) { char *buf; int len; buf = kmemdup(data, size, GFP_KERNEL); if (buf == NULL) return -ENOMEM; len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), HID_REQ_SET_REPORT, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, usb_command, 0, buf, size, USB_CTRL_SET_TIMEOUT); kfree(buf); return ((len < 0) ? len : ((len != size) ? -EIO : 0)); } static void kone_set_settings_checksum(struct kone_settings *settings) { uint16_t checksum = 0; unsigned char *address = (unsigned char *)settings; int i; for (i = 0; i < sizeof(struct kone_settings) - 2; ++i, ++address) checksum += *address; settings->checksum = cpu_to_le16(checksum); } /* * Checks success after writing data to mouse * On success returns 0 * On failure returns errno */ static int kone_check_write(struct usb_device *usb_dev) { int retval; uint8_t data; do { /* * Mouse needs 50 msecs until it says ok, but there are * 30 more msecs needed for next write to work. */ msleep(80); retval = kone_receive(usb_dev, kone_command_confirm_write, &data, 1); if (retval) return retval; /* * value of 3 seems to mean something like * "not finished yet, but it looks good" * So check again after a moment. */ } while (data == 3); if (data == 1) /* everything alright */ return 0; /* unknown answer */ dev_err(&usb_dev->dev, "got retval %d when checking write\n", data); return -EIO; } /* * Reads settings from mouse and stores it in @buf * On success returns 0 * On failure returns errno */ static int kone_get_settings(struct usb_device *usb_dev, struct kone_settings *buf) { return kone_receive(usb_dev, kone_command_settings, buf, sizeof(struct kone_settings)); } /* * Writes settings from @buf to mouse * On success returns 0 * On failure returns errno */ static int kone_set_settings(struct usb_device *usb_dev, struct kone_settings const *settings) { int retval; retval = kone_send(usb_dev, kone_command_settings, settings, sizeof(struct kone_settings)); if (retval) return retval; return kone_check_write(usb_dev); } /* * Reads profile data from mouse and stores it in @buf * @number: profile number to read * On success returns 0 * On failure returns errno */ static int kone_get_profile(struct usb_device *usb_dev, struct kone_profile *buf, int number) { int len; if (number < 1 || number > 5) return -EINVAL; len = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), USB_REQ_CLEAR_FEATURE, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, kone_command_profile, number, buf, sizeof(struct kone_profile), USB_CTRL_SET_TIMEOUT); if (len != sizeof(struct kone_profile)) return -EIO; return 0; } /* * Writes profile data to mouse. * @number: profile number to write * On success returns 0 * On failure returns errno */ static int kone_set_profile(struct usb_device *usb_dev, struct kone_profile const *profile, int number) { int len; if (number < 1 || number > 5) return -EINVAL; len = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), USB_REQ_SET_CONFIGURATION, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT, kone_command_profile, number, (void *)profile, sizeof(struct kone_profile), USB_CTRL_SET_TIMEOUT); if (len != sizeof(struct kone_profile)) return len; if (kone_check_write(usb_dev)) return -EIO; return 0; } /* * Reads value of "fast-clip-weight" and stores it in @result * On success returns 0 * On failure returns errno */ static int kone_get_weight(struct usb_device *usb_dev, int *result) { int retval; uint8_t data; retval = kone_receive(usb_dev, kone_command_weight, &data, 1); if (retval) return retval; *result = (int)data; return 0; } /* * Reads firmware_version of mouse and stores it in @result * On success returns 0 * On failure returns errno */ static int kone_get_firmware_version(struct usb_device *usb_dev, int *result) { int retval; uint16_t data; retval = kone_receive(usb_dev, kone_command_firmware_version, &data, 2); if (retval) return retval; *result = le16_to_cpu(data); return 0; } static ssize_t kone_sysfs_read_settings(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); if (off >= sizeof(struct kone_settings)) return 0; if (off + count > sizeof(struct kone_settings)) count = sizeof(struct kone_settings) - off; mutex_lock(&kone->kone_lock); memcpy(buf, ((char const *)&kone->settings) + off, count); mutex_unlock(&kone->kone_lock); return count; } /* * Writing settings automatically activates startup_profile. * This function keeps values in kone_device up to date and assumes that in * case of error the old data is still valid */ static ssize_t kone_sysfs_write_settings(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); int retval = 0, difference, old_profile; struct kone_settings *settings = (struct kone_settings *)buf; /* I need to get my data in one piece */ if (off != 0 || count != sizeof(struct kone_settings)) return -EINVAL; mutex_lock(&kone->kone_lock); difference = memcmp(settings, &kone->settings, sizeof(struct kone_settings)); if (difference) { if (settings->startup_profile < 1 || settings->startup_profile > 5) { retval = -EINVAL; goto unlock; } retval = kone_set_settings(usb_dev, settings); if (retval) goto unlock; old_profile = kone->settings.startup_profile; memcpy(&kone->settings, settings, sizeof(struct kone_settings)); kone_profile_activated(kone, kone->settings.startup_profile); if (kone->settings.startup_profile != old_profile) kone_profile_report(kone, kone->settings.startup_profile); } unlock: mutex_unlock(&kone->kone_lock); if (retval) return retval; return sizeof(struct kone_settings); } static const BIN_ATTR(settings, 0660, kone_sysfs_read_settings, kone_sysfs_write_settings, sizeof(struct kone_settings)); static ssize_t kone_sysfs_read_profilex(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); if (off >= sizeof(struct kone_profile)) return 0; if (off + count > sizeof(struct kone_profile)) count = sizeof(struct kone_profile) - off; mutex_lock(&kone->kone_lock); memcpy(buf, ((char const *)&kone->profiles[*(uint *)(attr->private)]) + off, count); mutex_unlock(&kone->kone_lock); return count; } /* Writes data only if different to stored data */ static ssize_t kone_sysfs_write_profilex(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); struct kone_profile *profile; int retval = 0, difference; /* I need to get my data in one piece */ if (off != 0 || count != sizeof(struct kone_profile)) return -EINVAL; profile = &kone->profiles[*(uint *)(attr->private)]; mutex_lock(&kone->kone_lock); difference = memcmp(buf, profile, sizeof(struct kone_profile)); if (difference) { retval = kone_set_profile(usb_dev, (struct kone_profile const *)buf, *(uint *)(attr->private) + 1); if (!retval) memcpy(profile, buf, sizeof(struct kone_profile)); } mutex_unlock(&kone->kone_lock); if (retval) return retval; return sizeof(struct kone_profile); } #define PROFILE_ATTR(number) \ static const struct bin_attribute bin_attr_profile##number = { \ .attr = { .name = "profile" #number, .mode = 0660 }, \ .size = sizeof(struct kone_profile), \ .read_new = kone_sysfs_read_profilex, \ .write_new = kone_sysfs_write_profilex, \ .private = &profile_numbers[number-1], \ } PROFILE_ATTR(1); PROFILE_ATTR(2); PROFILE_ATTR(3); PROFILE_ATTR(4); PROFILE_ATTR(5); static ssize_t kone_sysfs_show_actual_profile(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->actual_profile); } static DEVICE_ATTR(actual_profile, 0440, kone_sysfs_show_actual_profile, NULL); static ssize_t kone_sysfs_show_actual_dpi(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->actual_dpi); } static DEVICE_ATTR(actual_dpi, 0440, kone_sysfs_show_actual_dpi, NULL); /* weight is read each time, since we don't get informed when it's changed */ static ssize_t kone_sysfs_show_weight(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone; struct usb_device *usb_dev; int weight = 0; int retval; dev = dev->parent->parent; kone = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); mutex_lock(&kone->kone_lock); retval = kone_get_weight(usb_dev, &weight); mutex_unlock(&kone->kone_lock); if (retval) return retval; return sysfs_emit(buf, "%d\n", weight); } static DEVICE_ATTR(weight, 0440, kone_sysfs_show_weight, NULL); static ssize_t kone_sysfs_show_firmware_version(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->firmware_version); } static DEVICE_ATTR(firmware_version, 0440, kone_sysfs_show_firmware_version, NULL); static ssize_t kone_sysfs_show_tcu(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->settings.tcu); } static int kone_tcu_command(struct usb_device *usb_dev, int number) { unsigned char value; value = number; return kone_send(usb_dev, kone_command_calibrate, &value, 1); } /* * Calibrating the tcu is the only action that changes settings data inside the * mouse, so this data needs to be reread */ static ssize_t kone_sysfs_set_tcu(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct kone_device *kone; struct usb_device *usb_dev; int retval; unsigned long state; dev = dev->parent->parent; kone = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); retval = kstrtoul(buf, 10, &state); if (retval) return retval; if (state != 0 && state != 1) return -EINVAL; mutex_lock(&kone->kone_lock); if (state == 1) { /* state activate */ retval = kone_tcu_command(usb_dev, 1); if (retval) goto exit_unlock; retval = kone_tcu_command(usb_dev, 2); if (retval) goto exit_unlock; ssleep(5); /* tcu needs this time for calibration */ retval = kone_tcu_command(usb_dev, 3); if (retval) goto exit_unlock; retval = kone_tcu_command(usb_dev, 0); if (retval) goto exit_unlock; retval = kone_tcu_command(usb_dev, 4); if (retval) goto exit_unlock; /* * Kone needs this time to settle things. * Reading settings too early will result in invalid data. * Roccat's driver waits 1 sec, maybe this time could be * shortened. */ ssleep(1); } /* calibration changes values in settings, so reread */ retval = kone_get_settings(usb_dev, &kone->settings); if (retval) goto exit_no_settings; /* only write settings back if activation state is different */ if (kone->settings.tcu != state) { kone->settings.tcu = state; kone_set_settings_checksum(&kone->settings); retval = kone_set_settings(usb_dev, &kone->settings); if (retval) { dev_err(&usb_dev->dev, "couldn't set tcu state\n"); /* * try to reread valid settings into buffer overwriting * first error code */ retval = kone_get_settings(usb_dev, &kone->settings); if (retval) goto exit_no_settings; goto exit_unlock; } /* calibration resets profile */ kone_profile_activated(kone, kone->settings.startup_profile); } retval = size; exit_no_settings: dev_err(&usb_dev->dev, "couldn't read settings\n"); exit_unlock: mutex_unlock(&kone->kone_lock); return retval; } static DEVICE_ATTR(tcu, 0660, kone_sysfs_show_tcu, kone_sysfs_set_tcu); static ssize_t kone_sysfs_show_startup_profile(struct device *dev, struct device_attribute *attr, char *buf) { struct kone_device *kone = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", kone->settings.startup_profile); } static ssize_t kone_sysfs_set_startup_profile(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct kone_device *kone; struct usb_device *usb_dev; int retval; unsigned long new_startup_profile; dev = dev->parent->parent; kone = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); retval = kstrtoul(buf, 10, &new_startup_profile); if (retval) return retval; if (new_startup_profile < 1 || new_startup_profile > 5) return -EINVAL; mutex_lock(&kone->kone_lock); kone->settings.startup_profile = new_startup_profile; kone_set_settings_checksum(&kone->settings); retval = kone_set_settings(usb_dev, &kone->settings); if (retval) { mutex_unlock(&kone->kone_lock); return retval; } /* changing the startup profile immediately activates this profile */ kone_profile_activated(kone, new_startup_profile); kone_profile_report(kone, new_startup_profile); mutex_unlock(&kone->kone_lock); return size; } static DEVICE_ATTR(startup_profile, 0660, kone_sysfs_show_startup_profile, kone_sysfs_set_startup_profile); static struct attribute *kone_attrs[] = { /* * Read actual dpi settings. * Returns raw value for further processing. Refer to enum * kone_polling_rates to get real value. */ &dev_attr_actual_dpi.attr, &dev_attr_actual_profile.attr, /* * The mouse can be equipped with one of four supplied weights from 5 * to 20 grams which are recognized and its value can be read out. * This returns the raw value reported by the mouse for easy evaluation * by software. Refer to enum kone_weights to get corresponding real * weight. */ &dev_attr_weight.attr, /* * Prints firmware version stored in mouse as integer. * The raw value reported by the mouse is returned for easy evaluation, * to get the real version number the decimal point has to be shifted 2 * positions to the left. E.g. a value of 138 means 1.38. */ &dev_attr_firmware_version.attr, /* * Prints state of Tracking Control Unit as number where 0 = off and * 1 = on. Writing 0 deactivates tcu and writing 1 calibrates and * activates the tcu */ &dev_attr_tcu.attr, /* Prints and takes the number of the profile the mouse starts with */ &dev_attr_startup_profile.attr, NULL, }; static const struct bin_attribute *const kone_bin_attributes[] = { &bin_attr_settings, &bin_attr_profile1, &bin_attr_profile2, &bin_attr_profile3, &bin_attr_profile4, &bin_attr_profile5, NULL, }; static const struct attribute_group kone_group = { .attrs = kone_attrs, .bin_attrs_new = kone_bin_attributes, }; static const struct attribute_group *kone_groups[] = { &kone_group, NULL, }; /* kone_class is used for creating sysfs attributes via roccat char device */ static const struct class kone_class = { .name = "kone", .dev_groups = kone_groups, }; static int kone_init_kone_device_struct(struct usb_device *usb_dev, struct kone_device *kone) { uint i; int retval; mutex_init(&kone->kone_lock); for (i = 0; i < 5; ++i) { retval = kone_get_profile(usb_dev, &kone->profiles[i], i + 1); if (retval) return retval; } retval = kone_get_settings(usb_dev, &kone->settings); if (retval) return retval; retval = kone_get_firmware_version(usb_dev, &kone->firmware_version); if (retval) return retval; kone_profile_activated(kone, kone->settings.startup_profile); return 0; } /* * Since IGNORE_MOUSE quirk moved to hid-apple, there is no way to bind only to * mousepart if usb_hid is compiled into the kernel and kone is compiled as * module. * Secial behaviour is bound only to mousepart since only mouseevents contain * additional notifications. */ static int kone_init_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct usb_device *usb_dev = interface_to_usbdev(intf); struct kone_device *kone; int retval; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) { kone = kzalloc(sizeof(*kone), GFP_KERNEL); if (!kone) return -ENOMEM; hid_set_drvdata(hdev, kone); retval = kone_init_kone_device_struct(usb_dev, kone); if (retval) { hid_err(hdev, "couldn't init struct kone_device\n"); goto exit_free; } retval = roccat_connect(&kone_class, hdev, sizeof(struct kone_roccat_report)); if (retval < 0) { hid_err(hdev, "couldn't init char dev\n"); /* be tolerant about not getting chrdev */ } else { kone->roccat_claimed = 1; kone->chrdev_minor = retval; } } else { hid_set_drvdata(hdev, NULL); } return 0; exit_free: kfree(kone); return retval; } static void kone_remove_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct kone_device *kone; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) { kone = hid_get_drvdata(hdev); if (kone->roccat_claimed) roccat_disconnect(kone->chrdev_minor); kfree(hid_get_drvdata(hdev)); } } static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; if (!hid_is_usb(hdev)) return -EINVAL; retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); goto exit; } retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (retval) { hid_err(hdev, "hw start failed\n"); goto exit; } retval = kone_init_specials(hdev); if (retval) { hid_err(hdev, "couldn't install mouse\n"); goto exit_stop; } return 0; exit_stop: hid_hw_stop(hdev); exit: return retval; } static void kone_remove(struct hid_device *hdev) { kone_remove_specials(hdev); hid_hw_stop(hdev); } /* handle special events and keep actual profile and dpi values up to date */ static void kone_keep_values_up_to_date(struct kone_device *kone, struct kone_mouse_event const *event) { switch (event->event) { case kone_mouse_event_switch_profile: kone->actual_dpi = kone->profiles[event->value - 1]. startup_dpi; fallthrough; case kone_mouse_event_osd_profile: kone->actual_profile = event->value; break; case kone_mouse_event_switch_dpi: case kone_mouse_event_osd_dpi: kone->actual_dpi = event->value; break; } } static void kone_report_to_chrdev(struct kone_device const *kone, struct kone_mouse_event const *event) { struct kone_roccat_report roccat_report; switch (event->event) { case kone_mouse_event_switch_profile: case kone_mouse_event_switch_dpi: case kone_mouse_event_osd_profile: case kone_mouse_event_osd_dpi: roccat_report.event = event->event; roccat_report.value = event->value; roccat_report.key = 0; roccat_report_event(kone->chrdev_minor, (uint8_t *)&roccat_report); break; case kone_mouse_event_call_overlong_macro: case kone_mouse_event_multimedia: if (event->value == kone_keystroke_action_press) { roccat_report.event = event->event; roccat_report.value = kone->actual_profile; roccat_report.key = event->macro_key; roccat_report_event(kone->chrdev_minor, (uint8_t *)&roccat_report); } break; } } /* * Is called for keyboard- and mousepart. * Only mousepart gets informations about special events in its extended event * structure. */ static int kone_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct kone_device *kone = hid_get_drvdata(hdev); struct kone_mouse_event *event = (struct kone_mouse_event *)data; /* keyboard events are always processed by default handler */ if (size != sizeof(struct kone_mouse_event)) return 0; if (kone == NULL) return 0; /* * Firmware 1.38 introduced new behaviour for tilt and special buttons. * Pressed button is reported in each movement event. * Workaround sends only one event per press. */ if (memcmp(&kone->last_mouse_event.tilt, &event->tilt, 5)) memcpy(&kone->last_mouse_event, event, sizeof(struct kone_mouse_event)); else memset(&event->wipe, 0, sizeof(event->wipe)); kone_keep_values_up_to_date(kone, event); if (kone->roccat_claimed) kone_report_to_chrdev(kone, event); return 0; /* always do further processing */ } static const struct hid_device_id kone_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONE) }, { } }; MODULE_DEVICE_TABLE(hid, kone_devices); static struct hid_driver kone_driver = { .name = "kone", .id_table = kone_devices, .probe = kone_probe, .remove = kone_remove, .raw_event = kone_raw_event }; static int __init kone_init(void) { int retval; /* class name has to be same as driver name */ retval = class_register(&kone_class); if (retval) return retval; retval = hid_register_driver(&kone_driver); if (retval) class_unregister(&kone_class); return retval; } static void __exit kone_exit(void) { hid_unregister_driver(&kone_driver); class_unregister(&kone_class); } module_init(kone_init); module_exit(kone_exit); MODULE_AUTHOR("Stefan Achatz"); MODULE_DESCRIPTION("USB Roccat Kone driver"); MODULE_LICENSE("GPL v2"); |
90 90 90 90 90 90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 | // SPDX-License-Identifier: GPL-2.0 /* * Neil Brown <neilb@cse.unsw.edu.au> * J. Bruce Fields <bfields@umich.edu> * Andy Adamson <andros@umich.edu> * Dug Song <dugsong@monkey.org> * * RPCSEC_GSS server authentication. * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078 * (gssapi) * * The RPCSEC_GSS involves three stages: * 1/ context creation * 2/ data exchange * 3/ context destruction * * Context creation is handled largely by upcalls to user-space. * In particular, GSS_Accept_sec_context is handled by an upcall * Data exchange is handled entirely within the kernel * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel. * Context destruction is handled in-kernel * GSS_Delete_sec_context is in-kernel * * Context creation is initiated by a RPCSEC_GSS_INIT request arriving. * The context handle and gss_token are used as a key into the rpcsec_init cache. * The content of this cache includes some of the outputs of GSS_Accept_sec_context, * being major_status, minor_status, context_handle, reply_token. * These are sent back to the client. * Sequence window management is handled by the kernel. The window size if currently * a compile time constant. * * When user-space is happy that a context is established, it places an entry * in the rpcsec_context cache. The key for this cache is the context_handle. * The content includes: * uid/gidlist - for determining access rights * mechanism type * mechanism specific information, such as a key * */ #include <linux/slab.h> #include <linux/types.h> #include <linux/module.h> #include <linux/pagemap.h> #include <linux/user_namespace.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_err.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/gss_krb5.h> #include <trace/events/rpcgss.h> #include "gss_rpc_upcall.h" /* * Unfortunately there isn't a maximum checksum size exported via the * GSS API. Manufacture one based on GSS mechanisms supported by this * implementation. */ #define GSS_MAX_CKSUMSIZE (GSS_KRB5_TOK_HDR_LEN + GSS_KRB5_MAX_CKSUM_LEN) /* * This value may be increased in the future to accommodate other * usage of the scratch buffer. */ #define GSS_SCRATCH_SIZE GSS_MAX_CKSUMSIZE struct gss_svc_data { /* decoded gss client cred: */ struct rpc_gss_wire_cred clcred; u32 gsd_databody_offset; struct rsc *rsci; /* for temporary results */ __be32 gsd_seq_num; u8 gsd_scratch[GSS_SCRATCH_SIZE]; }; /* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests * into replies. * * Key is context handle (\x if empty) and gss_token. * Content is major_status minor_status (integers) context_handle, reply_token. * */ static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b) { return a->len == b->len && 0 == memcmp(a->data, b->data, a->len); } #define RSI_HASHBITS 6 #define RSI_HASHMAX (1<<RSI_HASHBITS) struct rsi { struct cache_head h; struct xdr_netobj in_handle, in_token; struct xdr_netobj out_handle, out_token; int major_status, minor_status; struct rcu_head rcu_head; }; static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old); static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item); static void rsi_free(struct rsi *rsii) { kfree(rsii->in_handle.data); kfree(rsii->in_token.data); kfree(rsii->out_handle.data); kfree(rsii->out_token.data); } static void rsi_free_rcu(struct rcu_head *head) { struct rsi *rsii = container_of(head, struct rsi, rcu_head); rsi_free(rsii); kfree(rsii); } static void rsi_put(struct kref *ref) { struct rsi *rsii = container_of(ref, struct rsi, h.ref); call_rcu(&rsii->rcu_head, rsi_free_rcu); } static inline int rsi_hash(struct rsi *item) { return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS) ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS); } static int rsi_match(struct cache_head *a, struct cache_head *b) { struct rsi *item = container_of(a, struct rsi, h); struct rsi *tmp = container_of(b, struct rsi, h); return netobj_equal(&item->in_handle, &tmp->in_handle) && netobj_equal(&item->in_token, &tmp->in_token); } static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len) { dst->len = len; dst->data = (len ? kmemdup(src, len, GFP_KERNEL) : NULL); if (len && !dst->data) return -ENOMEM; return 0; } static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src) { return dup_to_netobj(dst, src->data, src->len); } static void rsi_init(struct cache_head *cnew, struct cache_head *citem) { struct rsi *new = container_of(cnew, struct rsi, h); struct rsi *item = container_of(citem, struct rsi, h); new->out_handle.data = NULL; new->out_handle.len = 0; new->out_token.data = NULL; new->out_token.len = 0; new->in_handle.len = item->in_handle.len; item->in_handle.len = 0; new->in_token.len = item->in_token.len; item->in_token.len = 0; new->in_handle.data = item->in_handle.data; item->in_handle.data = NULL; new->in_token.data = item->in_token.data; item->in_token.data = NULL; } static void update_rsi(struct cache_head *cnew, struct cache_head *citem) { struct rsi *new = container_of(cnew, struct rsi, h); struct rsi *item = container_of(citem, struct rsi, h); BUG_ON(new->out_handle.data || new->out_token.data); new->out_handle.len = item->out_handle.len; item->out_handle.len = 0; new->out_token.len = item->out_token.len; item->out_token.len = 0; new->out_handle.data = item->out_handle.data; item->out_handle.data = NULL; new->out_token.data = item->out_token.data; item->out_token.data = NULL; new->major_status = item->major_status; new->minor_status = item->minor_status; } static struct cache_head *rsi_alloc(void) { struct rsi *rsii = kmalloc(sizeof(*rsii), GFP_KERNEL); if (rsii) return &rsii->h; else return NULL; } static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) { return sunrpc_cache_pipe_upcall_timeout(cd, h); } static void rsi_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) { struct rsi *rsii = container_of(h, struct rsi, h); qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len); qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len); (*bpp)[-1] = '\n'; WARN_ONCE(*blen < 0, "RPCSEC/GSS credential too large - please use gssproxy\n"); } static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) { /* context token expiry major minor context token */ char *buf = mesg; char *ep; int len; struct rsi rsii, *rsip = NULL; time64_t expiry; int status = -EINVAL; memset(&rsii, 0, sizeof(rsii)); /* handle */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = -ENOMEM; if (dup_to_netobj(&rsii.in_handle, buf, len)) goto out; /* token */ len = qword_get(&mesg, buf, mlen); status = -EINVAL; if (len < 0) goto out; status = -ENOMEM; if (dup_to_netobj(&rsii.in_token, buf, len)) goto out; rsip = rsi_lookup(cd, &rsii); if (!rsip) goto out; rsii.h.flags = 0; /* expiry */ status = get_expiry(&mesg, &expiry); if (status) goto out; status = -EINVAL; /* major/minor */ len = qword_get(&mesg, buf, mlen); if (len <= 0) goto out; rsii.major_status = simple_strtoul(buf, &ep, 10); if (*ep) goto out; len = qword_get(&mesg, buf, mlen); if (len <= 0) goto out; rsii.minor_status = simple_strtoul(buf, &ep, 10); if (*ep) goto out; /* out_handle */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = -ENOMEM; if (dup_to_netobj(&rsii.out_handle, buf, len)) goto out; /* out_token */ len = qword_get(&mesg, buf, mlen); status = -EINVAL; if (len < 0) goto out; status = -ENOMEM; if (dup_to_netobj(&rsii.out_token, buf, len)) goto out; rsii.h.expiry_time = expiry; rsip = rsi_update(cd, &rsii, rsip); status = 0; out: rsi_free(&rsii); if (rsip) cache_put(&rsip->h, cd); else status = -ENOMEM; return status; } static const struct cache_detail rsi_cache_template = { .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", .cache_put = rsi_put, .cache_upcall = rsi_upcall, .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, .update = update_rsi, .alloc = rsi_alloc, }; static struct rsi *rsi_lookup(struct cache_detail *cd, struct rsi *item) { struct cache_head *ch; int hash = rsi_hash(item); ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash); if (ch) return container_of(ch, struct rsi, h); else return NULL; } static struct rsi *rsi_update(struct cache_detail *cd, struct rsi *new, struct rsi *old) { struct cache_head *ch; int hash = rsi_hash(new); ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsi, h); else return NULL; } /* * The rpcsec_context cache is used to store a context that is * used in data exchange. * The key is a context handle. The content is: * uid, gidlist, mechanism, service-set, mech-specific-data */ #define RSC_HASHBITS 10 #define RSC_HASHMAX (1<<RSC_HASHBITS) #define GSS_SEQ_WIN 128 struct gss_svc_seq_data { /* highest seq number seen so far: */ u32 sd_max; /* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of * sd_win is nonzero iff sequence number i has been seen already: */ unsigned long sd_win[GSS_SEQ_WIN/BITS_PER_LONG]; spinlock_t sd_lock; }; struct rsc { struct cache_head h; struct xdr_netobj handle; struct svc_cred cred; struct gss_svc_seq_data seqdata; struct gss_ctx *mechctx; struct rcu_head rcu_head; }; static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old); static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item); static void rsc_free(struct rsc *rsci) { kfree(rsci->handle.data); if (rsci->mechctx) gss_delete_sec_context(&rsci->mechctx); free_svc_cred(&rsci->cred); } static void rsc_free_rcu(struct rcu_head *head) { struct rsc *rsci = container_of(head, struct rsc, rcu_head); kfree(rsci->handle.data); kfree(rsci); } static void rsc_put(struct kref *ref) { struct rsc *rsci = container_of(ref, struct rsc, h.ref); if (rsci->mechctx) gss_delete_sec_context(&rsci->mechctx); free_svc_cred(&rsci->cred); call_rcu(&rsci->rcu_head, rsc_free_rcu); } static inline int rsc_hash(struct rsc *rsci) { return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS); } static int rsc_match(struct cache_head *a, struct cache_head *b) { struct rsc *new = container_of(a, struct rsc, h); struct rsc *tmp = container_of(b, struct rsc, h); return netobj_equal(&new->handle, &tmp->handle); } static void rsc_init(struct cache_head *cnew, struct cache_head *ctmp) { struct rsc *new = container_of(cnew, struct rsc, h); struct rsc *tmp = container_of(ctmp, struct rsc, h); new->handle.len = tmp->handle.len; tmp->handle.len = 0; new->handle.data = tmp->handle.data; tmp->handle.data = NULL; new->mechctx = NULL; init_svc_cred(&new->cred); } static void update_rsc(struct cache_head *cnew, struct cache_head *ctmp) { struct rsc *new = container_of(cnew, struct rsc, h); struct rsc *tmp = container_of(ctmp, struct rsc, h); new->mechctx = tmp->mechctx; tmp->mechctx = NULL; memset(&new->seqdata, 0, sizeof(new->seqdata)); spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; init_svc_cred(&tmp->cred); } static struct cache_head * rsc_alloc(void) { struct rsc *rsci = kmalloc(sizeof(*rsci), GFP_KERNEL); if (rsci) return &rsci->h; else return NULL; } static int rsc_upcall(struct cache_detail *cd, struct cache_head *h) { return -EINVAL; } static int rsc_parse(struct cache_detail *cd, char *mesg, int mlen) { /* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */ char *buf = mesg; int id; int len, rv; struct rsc rsci, *rscp = NULL; time64_t expiry; int status = -EINVAL; struct gss_api_mech *gm = NULL; memset(&rsci, 0, sizeof(rsci)); /* context handle */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = -ENOMEM; if (dup_to_netobj(&rsci.handle, buf, len)) goto out; rsci.h.flags = 0; /* expiry */ status = get_expiry(&mesg, &expiry); if (status) goto out; status = -EINVAL; rscp = rsc_lookup(cd, &rsci); if (!rscp) goto out; /* uid, or NEGATIVE */ rv = get_int(&mesg, &id); if (rv == -EINVAL) goto out; if (rv == -ENOENT) set_bit(CACHE_NEGATIVE, &rsci.h.flags); else { int N, i; /* * NOTE: we skip uid_valid()/gid_valid() checks here: * instead, * -1 id's are later mapped to the * (export-specific) anonymous id by nfsd_setuser. * * (But supplementary gid's get no such special * treatment so are checked for validity here.) */ /* uid */ rsci.cred.cr_uid = make_kuid(current_user_ns(), id); /* gid */ if (get_int(&mesg, &id)) goto out; rsci.cred.cr_gid = make_kgid(current_user_ns(), id); /* number of additional gid's */ if (get_int(&mesg, &N)) goto out; if (N < 0 || N > NGROUPS_MAX) goto out; status = -ENOMEM; rsci.cred.cr_group_info = groups_alloc(N); if (rsci.cred.cr_group_info == NULL) goto out; /* gid's */ status = -EINVAL; for (i=0; i<N; i++) { kgid_t kgid; if (get_int(&mesg, &id)) goto out; kgid = make_kgid(current_user_ns(), id); if (!gid_valid(kgid)) goto out; rsci.cred.cr_group_info->gid[i] = kgid; } groups_sort(rsci.cred.cr_group_info); /* mech name */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; gm = rsci.cred.cr_gss_mech = gss_mech_get_by_name(buf); status = -EOPNOTSUPP; if (!gm) goto out; status = -EINVAL; /* mech-specific data: */ len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, NULL, GFP_KERNEL); if (status) goto out; /* get client name */ len = qword_get(&mesg, buf, mlen); if (len > 0) { rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL); if (!rsci.cred.cr_principal) { status = -ENOMEM; goto out; } } } rsci.h.expiry_time = expiry; rscp = rsc_update(cd, &rsci, rscp); status = 0; out: rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); else status = -ENOMEM; return status; } static const struct cache_detail rsc_cache_template = { .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, .name = "auth.rpcsec.context", .cache_put = rsc_put, .cache_upcall = rsc_upcall, .cache_parse = rsc_parse, .match = rsc_match, .init = rsc_init, .update = update_rsc, .alloc = rsc_alloc, }; static struct rsc *rsc_lookup(struct cache_detail *cd, struct rsc *item) { struct cache_head *ch; int hash = rsc_hash(item); ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash); if (ch) return container_of(ch, struct rsc, h); else return NULL; } static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old) { struct cache_head *ch; int hash = rsc_hash(new); ch = sunrpc_cache_update(cd, &new->h, &old->h, hash); if (ch) return container_of(ch, struct rsc, h); else return NULL; } static struct rsc * gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) { struct rsc rsci; struct rsc *found; memset(&rsci, 0, sizeof(rsci)); if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) return NULL; found = rsc_lookup(cd, &rsci); rsc_free(&rsci); if (!found) return NULL; if (cache_check(cd, &found->h, NULL)) return NULL; return found; } /** * gss_check_seq_num - GSS sequence number window check * @rqstp: RPC Call to use when reporting errors * @rsci: cached GSS context state (updated on return) * @seq_num: sequence number to check * * Implements sequence number algorithm as specified in * RFC 2203, Section 5.3.3.1. "Context Management". * * Return values: * %true: @rqstp's GSS sequence number is inside the window * %false: @rqstp's GSS sequence number is outside the window */ static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci, u32 seq_num) { struct gss_svc_seq_data *sd = &rsci->seqdata; bool result = false; spin_lock(&sd->sd_lock); if (seq_num > sd->sd_max) { if (seq_num >= sd->sd_max + GSS_SEQ_WIN) { memset(sd->sd_win, 0, sizeof(sd->sd_win)); sd->sd_max = seq_num; } else while (sd->sd_max < seq_num) { sd->sd_max++; __clear_bit(sd->sd_max % GSS_SEQ_WIN, sd->sd_win); } __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win); goto ok; } else if (seq_num + GSS_SEQ_WIN <= sd->sd_max) { goto toolow; } if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win)) goto alreadyseen; ok: result = true; out: spin_unlock(&sd->sd_lock); return result; toolow: trace_rpcgss_svc_seqno_low(rqstp, seq_num, sd->sd_max - GSS_SEQ_WIN, sd->sd_max); goto out; alreadyseen: trace_rpcgss_svc_seqno_seen(rqstp, seq_num); goto out; } /* * Decode and verify a Call's verifier field. For RPC_AUTH_GSS Calls, * the body of this field contains a variable length checksum. * * GSS-specific auth_stat values are mandated by RFC 2203 Section * 5.3.3.3. */ static int svcauth_gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, __be32 *rpcstart, struct rpc_gss_wire_cred *gc) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct gss_ctx *ctx_id = rsci->mechctx; u32 flavor, maj_stat; struct xdr_buf rpchdr; struct xdr_netobj checksum; struct kvec iov; /* * Compute the checksum of the incoming Call from the * XID field to credential field: */ iov.iov_base = rpcstart; iov.iov_len = (u8 *)xdr->p - (u8 *)rpcstart; xdr_buf_from_iov(&iov, &rpchdr); /* Call's verf field: */ if (xdr_stream_decode_opaque_auth(xdr, &flavor, (void **)&checksum.data, &checksum.len) < 0) { rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } if (flavor != RPC_AUTH_GSS) { rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } if (rqstp->rq_deferred) return SVC_OK; maj_stat = gss_verify_mic(ctx_id, &rpchdr, &checksum); if (maj_stat != GSS_S_COMPLETE) { trace_rpcgss_svc_mic(rqstp, maj_stat); rqstp->rq_auth_stat = rpcsec_gsserr_credproblem; return SVC_DENIED; } if (gc->gc_seq > MAXSEQ) { trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq); rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem; return SVC_DENIED; } if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq)) return SVC_DROP; return SVC_OK; } /* * Construct and encode a Reply's verifier field. The verifier's body * field contains a variable-length checksum of the GSS sequence * number. */ static bool svcauth_gss_encode_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) { struct gss_svc_data *gsd = rqstp->rq_auth_data; u32 maj_stat; struct xdr_buf verf_data; struct xdr_netobj checksum; struct kvec iov; gsd->gsd_seq_num = cpu_to_be32(seq); iov.iov_base = &gsd->gsd_seq_num; iov.iov_len = XDR_UNIT; xdr_buf_from_iov(&iov, &verf_data); checksum.data = gsd->gsd_scratch; maj_stat = gss_get_mic(ctx_id, &verf_data, &checksum); if (maj_stat != GSS_S_COMPLETE) goto bad_mic; return xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_GSS, checksum.data, checksum.len) > 0; bad_mic: trace_rpcgss_svc_get_mic(rqstp, maj_stat); return false; } struct gss_domain { struct auth_domain h; u32 pseudoflavor; }; static struct auth_domain * find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) { char *name; name = gss_service_to_auth_domain_name(ctx->mech_type, svc); if (!name) return NULL; return auth_domain_find(name); } static struct auth_ops svcauthops_gss; u32 svcauth_gss_flavor(struct auth_domain *dom) { struct gss_domain *gd = container_of(dom, struct gss_domain, h); return gd->pseudoflavor; } EXPORT_SYMBOL_GPL(svcauth_gss_flavor); struct auth_domain * svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { struct gss_domain *new; struct auth_domain *test; int stat = -ENOMEM; new = kmalloc(sizeof(*new), GFP_KERNEL); if (!new) goto out; kref_init(&new->h.ref); new->h.name = kstrdup(name, GFP_KERNEL); if (!new->h.name) goto out_free_dom; new->h.flavour = &svcauthops_gss; new->pseudoflavor = pseudoflavor; test = auth_domain_lookup(name, &new->h); if (test != &new->h) { pr_warn("svc: duplicate registration of gss pseudo flavour %s.\n", name); stat = -EADDRINUSE; auth_domain_put(test); goto out_free_name; } return test; out_free_name: kfree(new->h.name); out_free_dom: kfree(new); out: return ERR_PTR(stat); } EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor); /* * RFC 2203, Section 5.3.2.2 * * struct rpc_gss_integ_data { * opaque databody_integ<>; * opaque checksum<>; * }; * * struct rpc_gss_data_t { * unsigned int seq_num; * proc_req_arg_t arg; * }; */ static noinline_for_stack int svcauth_gss_unwrap_integ(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx) { struct gss_svc_data *gsd = rqstp->rq_auth_data; struct xdr_stream *xdr = &rqstp->rq_arg_stream; u32 len, offset, seq_num, maj_stat; struct xdr_buf *buf = xdr->buf; struct xdr_buf databody_integ; struct xdr_netobj checksum; /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) return 0; if (xdr_stream_decode_u32(xdr, &len) < 0) goto unwrap_failed; if (len & 3) goto unwrap_failed; offset = xdr_stream_pos(xdr); if (xdr_buf_subsegment(buf, &databody_integ, offset, len)) goto unwrap_failed; /* * The xdr_stream now points to the @seq_num field. The next * XDR data item is the @arg field, which contains the clear * text RPC program payload. The checksum, which follows the * @arg field, is located and decoded without updating the * xdr_stream. */ offset += len; if (xdr_decode_word(buf, offset, &checksum.len)) goto unwrap_failed; if (checksum.len > sizeof(gsd->gsd_scratch)) goto unwrap_failed; checksum.data = gsd->gsd_scratch; if (read_bytes_from_xdr_buf(buf, offset + XDR_UNIT, checksum.data, checksum.len)) goto unwrap_failed; maj_stat = gss_verify_mic(ctx, &databody_integ, &checksum); if (maj_stat != GSS_S_COMPLETE) goto bad_mic; /* The received seqno is protected by the checksum. */ if (xdr_stream_decode_u32(xdr, &seq_num) < 0) goto unwrap_failed; if (seq_num != seq) goto bad_seqno; xdr_truncate_decode(xdr, XDR_UNIT + checksum.len); return 0; unwrap_failed: trace_rpcgss_svc_unwrap_failed(rqstp); return -EINVAL; bad_seqno: trace_rpcgss_svc_seqno_bad(rqstp, seq, seq_num); return -EINVAL; bad_mic: trace_rpcgss_svc_mic(rqstp, maj_stat); return -EINVAL; } /* * RFC 2203, Section 5.3.2.3 * * struct rpc_gss_priv_data { * opaque databody_priv<> * }; * * struct rpc_gss_data_t { * unsigned int seq_num; * proc_req_arg_t arg; * }; */ static noinline_for_stack int svcauth_gss_unwrap_priv(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; u32 len, maj_stat, seq_num, offset; struct xdr_buf *buf = xdr->buf; unsigned int saved_len; if (xdr_stream_decode_u32(xdr, &len) < 0) goto unwrap_failed; if (rqstp->rq_deferred) { /* Already decrypted last time through! The sequence number * check at out_seq is unnecessary but harmless: */ goto out_seq; } if (len > xdr_stream_remaining(xdr)) goto unwrap_failed; offset = xdr_stream_pos(xdr); saved_len = buf->len; maj_stat = gss_unwrap(ctx, offset, offset + len, buf); if (maj_stat != GSS_S_COMPLETE) goto bad_unwrap; xdr->nwords -= XDR_QUADLEN(saved_len - buf->len); out_seq: /* gss_unwrap() decrypted the sequence number. */ if (xdr_stream_decode_u32(xdr, &seq_num) < 0) goto unwrap_failed; if (seq_num != seq) goto bad_seqno; return 0; unwrap_failed: trace_rpcgss_svc_unwrap_failed(rqstp); return -EINVAL; bad_seqno: trace_rpcgss_svc_seqno_bad(rqstp, seq, seq_num); return -EINVAL; bad_unwrap: trace_rpcgss_svc_unwrap(rqstp, maj_stat); return -EINVAL; } static enum svc_auth_status svcauth_gss_set_client(struct svc_rqst *rqstp) { struct gss_svc_data *svcdata = rqstp->rq_auth_data; struct rsc *rsci = svcdata->rsci; struct rpc_gss_wire_cred *gc = &svcdata->clcred; int stat; rqstp->rq_auth_stat = rpc_autherr_badcred; /* * A gss export can be specified either by: * export *(sec=krb5,rw) * or by * export gss/krb5(rw) * The latter is deprecated; but for backwards compatibility reasons * the nfsd code will still fall back on trying it if the former * doesn't work; so we try to make both available to nfsd, below. */ rqstp->rq_gssclient = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); if (rqstp->rq_gssclient == NULL) return SVC_DENIED; stat = svcauth_unix_set_client(rqstp); if (stat == SVC_DROP || stat == SVC_CLOSE) return stat; rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } static bool svcauth_gss_proc_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp, struct xdr_netobj *out_handle, int *major_status, u32 seq_num) { struct xdr_stream *xdr = &rqstp->rq_res_stream; struct rsc *rsci; bool rc; if (*major_status != GSS_S_COMPLETE) goto null_verifier; rsci = gss_svc_searchbyctx(cd, out_handle); if (rsci == NULL) { *major_status = GSS_S_NO_CONTEXT; goto null_verifier; } rc = svcauth_gss_encode_verf(rqstp, rsci->mechctx, seq_num); cache_put(&rsci->h, cd); return rc; null_verifier: return xdr_stream_encode_opaque_auth(xdr, RPC_AUTH_NULL, NULL, 0) > 0; } static void gss_free_in_token_pages(struct gssp_in_token *in_token) { int i; i = 0; while (in_token->pages[i]) put_page(in_token->pages[i++]); kfree(in_token->pages); in_token->pages = NULL; } static int gss_read_proxy_verf(struct svc_rqst *rqstp, struct rpc_gss_wire_cred *gc, struct xdr_netobj *in_handle, struct gssp_in_token *in_token) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; unsigned int length, pgto_offs, pgfrom_offs; int pages, i, pgto, pgfrom; size_t to_offs, from_offs; u32 inlen; if (dup_netobj(in_handle, &gc->gc_ctx)) return SVC_CLOSE; /* * RFC 2203 Section 5.2.2 * * struct rpc_gss_init_arg { * opaque gss_token<>; * }; */ if (xdr_stream_decode_u32(xdr, &inlen) < 0) goto out_denied_free; if (inlen > xdr_stream_remaining(xdr)) goto out_denied_free; pages = DIV_ROUND_UP(inlen, PAGE_SIZE); in_token->pages = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); if (!in_token->pages) goto out_denied_free; in_token->page_base = 0; in_token->page_len = inlen; for (i = 0; i < pages; i++) { in_token->pages[i] = alloc_page(GFP_KERNEL); if (!in_token->pages[i]) { gss_free_in_token_pages(in_token); goto out_denied_free; } } length = min_t(unsigned int, inlen, (char *)xdr->end - (char *)xdr->p); memcpy(page_address(in_token->pages[0]), xdr->p, length); inlen -= length; to_offs = length; from_offs = rqstp->rq_arg.page_base; while (inlen) { pgto = to_offs >> PAGE_SHIFT; pgfrom = from_offs >> PAGE_SHIFT; pgto_offs = to_offs & ~PAGE_MASK; pgfrom_offs = from_offs & ~PAGE_MASK; length = min_t(unsigned int, inlen, min_t(unsigned int, PAGE_SIZE - pgto_offs, PAGE_SIZE - pgfrom_offs)); memcpy(page_address(in_token->pages[pgto]) + pgto_offs, page_address(rqstp->rq_arg.pages[pgfrom]) + pgfrom_offs, length); to_offs += length; from_offs += length; inlen -= length; } return 0; out_denied_free: kfree(in_handle->data); return SVC_DENIED; } /* * RFC 2203, Section 5.2.3.1. * * struct rpc_gss_init_res { * opaque handle<>; * unsigned int gss_major; * unsigned int gss_minor; * unsigned int seq_window; * opaque gss_token<>; * }; */ static bool svcxdr_encode_gss_init_res(struct xdr_stream *xdr, struct xdr_netobj *handle, struct xdr_netobj *gss_token, unsigned int major_status, unsigned int minor_status, u32 seq_num) { if (xdr_stream_encode_opaque(xdr, handle->data, handle->len) < 0) return false; if (xdr_stream_encode_u32(xdr, major_status) < 0) return false; if (xdr_stream_encode_u32(xdr, minor_status) < 0) return false; if (xdr_stream_encode_u32(xdr, seq_num) < 0) return false; if (xdr_stream_encode_opaque(xdr, gss_token->data, gss_token->len) < 0) return false; return true; } /* * Having read the cred already and found we're in the context * initiation case, read the verifier and initiate (or check the results * of) upcalls to userspace for help with context initiation. If * the upcall results are available, write the verifier and result. * Otherwise, drop the request pending an answer to the upcall. */ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp, struct rpc_gss_wire_cred *gc) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct rsi *rsip, rsikey; __be32 *p; u32 len; int ret; struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); memset(&rsikey, 0, sizeof(rsikey)); if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) return SVC_CLOSE; /* * RFC 2203 Section 5.2.2 * * struct rpc_gss_init_arg { * opaque gss_token<>; * }; */ if (xdr_stream_decode_u32(xdr, &len) < 0) { kfree(rsikey.in_handle.data); return SVC_DENIED; } p = xdr_inline_decode(xdr, len); if (!p) { kfree(rsikey.in_handle.data); return SVC_DENIED; } rsikey.in_token.data = kmalloc(len, GFP_KERNEL); if (ZERO_OR_NULL_PTR(rsikey.in_token.data)) { kfree(rsikey.in_handle.data); return SVC_CLOSE; } memcpy(rsikey.in_token.data, p, len); rsikey.in_token.len = len; /* Perform upcall, or find upcall result: */ rsip = rsi_lookup(sn->rsi_cache, &rsikey); rsi_free(&rsikey); if (!rsip) return SVC_CLOSE; if (cache_check(sn->rsi_cache, &rsip->h, &rqstp->rq_chandle) < 0) /* No upcall result: */ return SVC_CLOSE; ret = SVC_CLOSE; if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &rsip->out_handle, &rsip->major_status, GSS_SEQ_WIN)) goto out; if (!svcxdr_set_accept_stat(rqstp)) goto out; if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &rsip->out_handle, &rsip->out_token, rsip->major_status, rsip->minor_status, GSS_SEQ_WIN)) goto out; ret = SVC_COMPLETE; out: cache_put(&rsip->h, sn->rsi_cache); return ret; } static int gss_proxy_save_rsc(struct cache_detail *cd, struct gssp_upcall_data *ud, uint64_t *handle) { struct rsc rsci, *rscp = NULL; static atomic64_t ctxhctr; long long ctxh; struct gss_api_mech *gm = NULL; time64_t expiry; int status; memset(&rsci, 0, sizeof(rsci)); /* context handle */ status = -ENOMEM; /* the handle needs to be just a unique id, * use a static counter */ ctxh = atomic64_inc_return(&ctxhctr); /* make a copy for the caller */ *handle = ctxh; /* make a copy for the rsc cache */ if (dup_to_netobj(&rsci.handle, (char *)handle, sizeof(uint64_t))) goto out; rscp = rsc_lookup(cd, &rsci); if (!rscp) goto out; /* creds */ if (!ud->found_creds) { /* userspace seem buggy, we should always get at least a * mapping to nobody */ goto out; } else { struct timespec64 boot; /* steal creds */ rsci.cred = ud->creds; memset(&ud->creds, 0, sizeof(struct svc_cred)); status = -EOPNOTSUPP; /* get mech handle from OID */ gm = gss_mech_get_by_OID(&ud->mech_oid); if (!gm) goto out; rsci.cred.cr_gss_mech = gm; status = -EINVAL; /* mech-specific data: */ status = gss_import_sec_context(ud->out_handle.data, ud->out_handle.len, gm, &rsci.mechctx, &expiry, GFP_KERNEL); if (status) goto out; getboottime64(&boot); expiry -= boot.tv_sec; } rsci.h.expiry_time = expiry; rscp = rsc_update(cd, &rsci, rscp); status = 0; out: rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); else status = -ENOMEM; return status; } static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, struct rpc_gss_wire_cred *gc) { struct xdr_netobj cli_handle; struct gssp_upcall_data ud; uint64_t handle; int status; int ret; struct net *net = SVC_NET(rqstp); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); memset(&ud, 0, sizeof(ud)); ret = gss_read_proxy_verf(rqstp, gc, &ud.in_handle, &ud.in_token); if (ret) return ret; ret = SVC_CLOSE; /* Perform synchronous upcall to gss-proxy */ status = gssp_accept_sec_context_upcall(net, &ud); if (status) goto out; trace_rpcgss_svc_accept_upcall(rqstp, ud.major_status, ud.minor_status); switch (ud.major_status) { case GSS_S_CONTINUE_NEEDED: cli_handle = ud.out_handle; break; case GSS_S_COMPLETE: status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle); if (status) goto out; cli_handle.data = (u8 *)&handle; cli_handle.len = sizeof(handle); break; default: goto out; } if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &cli_handle, &ud.major_status, GSS_SEQ_WIN)) goto out; if (!svcxdr_set_accept_stat(rqstp)) goto out; if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &cli_handle, &ud.out_token, ud.major_status, ud.minor_status, GSS_SEQ_WIN)) goto out; ret = SVC_COMPLETE; out: gss_free_in_token_pages(&ud.in_token); gssp_free_upcall_data(&ud); return ret; } /* * Try to set the sn->use_gss_proxy variable to a new value. We only allow * it to be changed if it's currently undefined (-1). If it's any other value * then return -EBUSY unless the type wouldn't have changed anyway. */ static int set_gss_proxy(struct net *net, int type) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret; WARN_ON_ONCE(type != 0 && type != 1); ret = cmpxchg(&sn->use_gss_proxy, -1, type); if (ret != -1 && ret != type) return -EBUSY; return 0; } static bool use_gss_proxy(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* If use_gss_proxy is still undefined, then try to disable it */ if (sn->use_gss_proxy == -1) set_gss_proxy(net, 0); return sn->use_gss_proxy; } static noinline_for_stack int svcauth_gss_proc_init(struct svc_rqst *rqstp, struct rpc_gss_wire_cred *gc) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; u32 flavor, len; void *body; /* Call's verf field: */ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0) return SVC_GARBAGE; if (flavor != RPC_AUTH_NULL || len != 0) { rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) { rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } if (!use_gss_proxy(SVC_NET(rqstp))) return svcauth_gss_legacy_init(rqstp, gc); return svcauth_gss_proxy_init(rqstp, gc); } #ifdef CONFIG_PROC_FS static ssize_t write_gssp(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct net *net = pde_data(file_inode(file)); char tbuf[20]; unsigned long i; int res; if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; if (copy_from_user(tbuf, buf, count)) return -EFAULT; tbuf[count] = 0; res = kstrtoul(tbuf, 0, &i); if (res) return res; if (i != 1) return -EINVAL; res = set_gssp_clnt(net); if (res) return res; res = set_gss_proxy(net, 1); if (res) return res; return count; } static ssize_t read_gssp(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct net *net = pde_data(file_inode(file)); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); unsigned long p = *ppos; char tbuf[10]; size_t len; snprintf(tbuf, sizeof(tbuf), "%d\n", sn->use_gss_proxy); len = strlen(tbuf); if (p >= len) return 0; len -= p; if (len > count) len = count; if (copy_to_user(buf, (void *)(tbuf+p), len)) return -EFAULT; *ppos += len; return len; } static const struct proc_ops use_gss_proxy_proc_ops = { .proc_open = nonseekable_open, .proc_write = write_gssp, .proc_read = read_gssp, }; static int create_use_gss_proxy_proc_entry(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct proc_dir_entry **p = &sn->use_gssp_proc; sn->use_gss_proxy = -1; *p = proc_create_data("use-gss-proxy", S_IFREG | 0600, sn->proc_net_rpc, &use_gss_proxy_proc_ops, net); if (!*p) return -ENOMEM; init_gssp_clnt(sn); return 0; } static void destroy_use_gss_proxy_proc_entry(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); if (sn->use_gssp_proc) { remove_proc_entry("use-gss-proxy", sn->proc_net_rpc); clear_gssp_clnt(sn); } } static ssize_t read_gss_krb5_enctypes(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct rpcsec_gss_oid oid = { .len = 9, .data = "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02", }; struct gss_api_mech *mech; ssize_t ret; mech = gss_mech_get_by_OID(&oid); if (!mech) return 0; if (!mech->gm_upcall_enctypes) { gss_mech_put(mech); return 0; } ret = simple_read_from_buffer(buf, count, ppos, mech->gm_upcall_enctypes, strlen(mech->gm_upcall_enctypes)); gss_mech_put(mech); return ret; } static const struct proc_ops gss_krb5_enctypes_proc_ops = { .proc_open = nonseekable_open, .proc_read = read_gss_krb5_enctypes, }; static int create_krb5_enctypes_proc_entry(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); sn->gss_krb5_enctypes = proc_create_data("gss_krb5_enctypes", S_IFREG | 0444, sn->proc_net_rpc, &gss_krb5_enctypes_proc_ops, net); return sn->gss_krb5_enctypes ? 0 : -ENOMEM; } static void destroy_krb5_enctypes_proc_entry(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); if (sn->gss_krb5_enctypes) remove_proc_entry("gss_krb5_enctypes", sn->proc_net_rpc); } #else /* CONFIG_PROC_FS */ static int create_use_gss_proxy_proc_entry(struct net *net) { return 0; } static void destroy_use_gss_proxy_proc_entry(struct net *net) {} static int create_krb5_enctypes_proc_entry(struct net *net) { return 0; } static void destroy_krb5_enctypes_proc_entry(struct net *net) {} #endif /* CONFIG_PROC_FS */ /* * The Call's credential body should contain a struct rpc_gss_cred_t. * * RFC 2203 Section 5 * * struct rpc_gss_cred_t { * union switch (unsigned int version) { * case RPCSEC_GSS_VERS_1: * struct { * rpc_gss_proc_t gss_proc; * unsigned int seq_num; * rpc_gss_service_t service; * opaque handle<>; * } rpc_gss_cred_vers_1_t; * } * }; */ static bool svcauth_gss_decode_credbody(struct xdr_stream *xdr, struct rpc_gss_wire_cred *gc, __be32 **rpcstart) { ssize_t handle_len; u32 body_len; __be32 *p; p = xdr_inline_decode(xdr, XDR_UNIT); if (!p) return false; /* * start of rpc packet is 7 u32's back from here: * xid direction rpcversion prog vers proc flavour */ *rpcstart = p - 7; body_len = be32_to_cpup(p); if (body_len > RPC_MAX_AUTH_SIZE) return false; /* struct rpc_gss_cred_t */ if (xdr_stream_decode_u32(xdr, &gc->gc_v) < 0) return false; if (xdr_stream_decode_u32(xdr, &gc->gc_proc) < 0) return false; if (xdr_stream_decode_u32(xdr, &gc->gc_seq) < 0) return false; if (xdr_stream_decode_u32(xdr, &gc->gc_svc) < 0) return false; handle_len = xdr_stream_decode_opaque_inline(xdr, (void **)&gc->gc_ctx.data, body_len); if (handle_len < 0) return false; if (body_len != XDR_UNIT * 5 + xdr_align_size(handle_len)) return false; gc->gc_ctx.len = handle_len; return true; } /** * svcauth_gss_accept - Decode and validate incoming RPC_AUTH_GSS credential * @rqstp: RPC transaction * * Return values: * %SVC_OK: Success * %SVC_COMPLETE: GSS context lifetime event * %SVC_DENIED: Credential or verifier is not valid * %SVC_GARBAGE: Failed to decode credential or verifier * %SVC_CLOSE: Temporary failure * * The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531). */ static enum svc_auth_status svcauth_gss_accept(struct svc_rqst *rqstp) { struct gss_svc_data *svcdata = rqstp->rq_auth_data; __be32 *rpcstart; struct rpc_gss_wire_cred *gc; struct rsc *rsci = NULL; int ret; struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); rqstp->rq_auth_stat = rpc_autherr_badcred; if (!svcdata) svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); if (!svcdata) goto auth_err; rqstp->rq_auth_data = svcdata; svcdata->gsd_databody_offset = 0; svcdata->rsci = NULL; gc = &svcdata->clcred; if (!svcauth_gss_decode_credbody(&rqstp->rq_arg_stream, gc, &rpcstart)) goto auth_err; if (gc->gc_v != RPC_GSS_VERSION) goto auth_err; switch (gc->gc_proc) { case RPC_GSS_PROC_INIT: case RPC_GSS_PROC_CONTINUE_INIT: if (rqstp->rq_proc != 0) goto auth_err; return svcauth_gss_proc_init(rqstp, gc); case RPC_GSS_PROC_DESTROY: if (rqstp->rq_proc != 0) goto auth_err; fallthrough; case RPC_GSS_PROC_DATA: rqstp->rq_auth_stat = rpcsec_gsserr_credproblem; rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx); if (!rsci) goto auth_err; switch (svcauth_gss_verify_header(rqstp, rsci, rpcstart, gc)) { case SVC_OK: break; case SVC_DENIED: goto auth_err; case SVC_DROP: goto drop; } break; default: if (rqstp->rq_proc != 0) goto auth_err; rqstp->rq_auth_stat = rpc_autherr_rejectedcred; goto auth_err; } /* now act upon the command: */ switch (gc->gc_proc) { case RPC_GSS_PROC_DESTROY: if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; if (!svcxdr_set_accept_stat(rqstp)) goto auth_err; /* Delete the entry from the cache_list and call cache_put */ sunrpc_cache_unhash(sn->rsc_cache, &rsci->h); goto complete; case RPC_GSS_PROC_DATA: rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem; if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; if (!svcxdr_set_accept_stat(rqstp)) goto auth_err; svcdata->gsd_databody_offset = xdr_stream_pos(&rqstp->rq_res_stream); rqstp->rq_cred = rsci->cred; get_group_info(rsci->cred.cr_group_info); rqstp->rq_auth_stat = rpc_autherr_badcred; switch (gc->gc_svc) { case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: /* placeholders for body length and seq. number: */ xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2); if (svcauth_gss_unwrap_integ(rqstp, gc->gc_seq, rsci->mechctx)) goto garbage_args; svcxdr_set_auth_slack(rqstp, RPC_MAX_AUTH_SIZE); break; case RPC_GSS_SVC_PRIVACY: /* placeholders for body length and seq. number: */ xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2); if (svcauth_gss_unwrap_priv(rqstp, gc->gc_seq, rsci->mechctx)) goto garbage_args; svcxdr_set_auth_slack(rqstp, RPC_MAX_AUTH_SIZE * 2); break; default: goto auth_err; } svcdata->rsci = rsci; cache_get(&rsci->h); rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor( rsci->mechctx->mech_type, GSS_C_QOP_DEFAULT, gc->gc_svc); ret = SVC_OK; trace_rpcgss_svc_authenticate(rqstp, gc); goto out; } garbage_args: ret = SVC_GARBAGE; goto out; auth_err: xdr_truncate_encode(&rqstp->rq_res_stream, XDR_UNIT * 2); ret = SVC_DENIED; goto out; complete: ret = SVC_COMPLETE; goto out; drop: ret = SVC_CLOSE; out: if (rsci) cache_put(&rsci->h, sn->rsc_cache); return ret; } static u32 svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd) { u32 offset; /* Release can be called twice, but we only wrap once. */ offset = gsd->gsd_databody_offset; gsd->gsd_databody_offset = 0; /* AUTH_ERROR replies are not wrapped. */ if (rqstp->rq_auth_stat != rpc_auth_ok) return 0; /* Also don't wrap if the accept_stat is nonzero: */ if (*rqstp->rq_accept_statp != rpc_success) return 0; return offset; } /* * RFC 2203, Section 5.3.2.2 * * struct rpc_gss_integ_data { * opaque databody_integ<>; * opaque checksum<>; * }; * * struct rpc_gss_data_t { * unsigned int seq_num; * proc_req_arg_t arg; * }; * * The RPC Reply message has already been XDR-encoded. rq_res_stream * is now positioned so that the checksum can be written just past * the RPC Reply message. */ static int svcauth_gss_wrap_integ(struct svc_rqst *rqstp) { struct gss_svc_data *gsd = rqstp->rq_auth_data; struct xdr_stream *xdr = &rqstp->rq_res_stream; struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *buf = xdr->buf; struct xdr_buf databody_integ; struct xdr_netobj checksum; u32 offset, maj_stat; offset = svcauth_gss_prepare_to_wrap(rqstp, gsd); if (!offset) goto out; if (xdr_buf_subsegment(buf, &databody_integ, offset + XDR_UNIT, buf->len - offset - XDR_UNIT)) goto wrap_failed; /* Buffer space for these has already been reserved in * svcauth_gss_accept(). */ if (xdr_encode_word(buf, offset, databody_integ.len)) goto wrap_failed; if (xdr_encode_word(buf, offset + XDR_UNIT, gc->gc_seq)) goto wrap_failed; checksum.data = gsd->gsd_scratch; maj_stat = gss_get_mic(gsd->rsci->mechctx, &databody_integ, &checksum); if (maj_stat != GSS_S_COMPLETE) goto bad_mic; if (xdr_stream_encode_opaque(xdr, checksum.data, checksum.len) < 0) goto wrap_failed; xdr_commit_encode(xdr); out: return 0; bad_mic: trace_rpcgss_svc_get_mic(rqstp, maj_stat); return -EINVAL; wrap_failed: trace_rpcgss_svc_wrap_failed(rqstp); return -EINVAL; } /* * RFC 2203, Section 5.3.2.3 * * struct rpc_gss_priv_data { * opaque databody_priv<> * }; * * struct rpc_gss_data_t { * unsigned int seq_num; * proc_req_arg_t arg; * }; * * gss_wrap() expands the size of the RPC message payload in the * response buffer. The main purpose of svcauth_gss_wrap_priv() * is to ensure there is adequate space in the response buffer to * avoid overflow during the wrap. */ static int svcauth_gss_wrap_priv(struct svc_rqst *rqstp) { struct gss_svc_data *gsd = rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc = &gsd->clcred; struct xdr_buf *buf = &rqstp->rq_res; struct kvec *head = buf->head; struct kvec *tail = buf->tail; u32 offset, pad, maj_stat; __be32 *p; offset = svcauth_gss_prepare_to_wrap(rqstp, gsd); if (!offset) return 0; /* * Buffer space for this field has already been reserved * in svcauth_gss_accept(). Note that the GSS sequence * number is encrypted along with the RPC reply payload. */ if (xdr_encode_word(buf, offset + XDR_UNIT, gc->gc_seq)) goto wrap_failed; /* * If there is currently tail data, make sure there is * room for the head, tail, and 2 * RPC_MAX_AUTH_SIZE in * the page, and move the current tail data such that * there is RPC_MAX_AUTH_SIZE slack space available in * both the head and tail. */ if (tail->iov_base) { if (tail->iov_base >= head->iov_base + PAGE_SIZE) goto wrap_failed; if (tail->iov_base < head->iov_base) goto wrap_failed; if (tail->iov_len + head->iov_len + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE) goto wrap_failed; memmove(tail->iov_base + RPC_MAX_AUTH_SIZE, tail->iov_base, tail->iov_len); tail->iov_base += RPC_MAX_AUTH_SIZE; } /* * If there is no current tail data, make sure there is * room for the head data, and 2 * RPC_MAX_AUTH_SIZE in the * allotted page, and set up tail information such that there * is RPC_MAX_AUTH_SIZE slack space available in both the * head and tail. */ if (!tail->iov_base) { if (head->iov_len + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE) goto wrap_failed; tail->iov_base = head->iov_base + head->iov_len + RPC_MAX_AUTH_SIZE; tail->iov_len = 0; } maj_stat = gss_wrap(gsd->rsci->mechctx, offset + XDR_UNIT, buf, buf->pages); if (maj_stat != GSS_S_COMPLETE) goto bad_wrap; /* Wrapping can change the size of databody_priv. */ if (xdr_encode_word(buf, offset, buf->len - offset - XDR_UNIT)) goto wrap_failed; pad = xdr_pad_size(buf->len - offset - XDR_UNIT); p = (__be32 *)(tail->iov_base + tail->iov_len); memset(p, 0, pad); tail->iov_len += pad; buf->len += pad; return 0; wrap_failed: trace_rpcgss_svc_wrap_failed(rqstp); return -EINVAL; bad_wrap: trace_rpcgss_svc_wrap(rqstp, maj_stat); return -ENOMEM; } /** * svcauth_gss_release - Wrap payload and release resources * @rqstp: RPC transaction context * * Return values: * %0: the Reply is ready to be sent * %-ENOMEM: failed to allocate memory * %-EINVAL: encoding error */ static int svcauth_gss_release(struct svc_rqst *rqstp) { struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); struct gss_svc_data *gsd = rqstp->rq_auth_data; struct rpc_gss_wire_cred *gc; int stat; if (!gsd) goto out; gc = &gsd->clcred; if (gc->gc_proc != RPC_GSS_PROC_DATA) goto out; switch (gc->gc_svc) { case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: stat = svcauth_gss_wrap_integ(rqstp); if (stat) goto out_err; break; case RPC_GSS_SVC_PRIVACY: stat = svcauth_gss_wrap_priv(rqstp); if (stat) goto out_err; break; /* * For any other gc_svc value, svcauth_gss_accept() already set * the auth_error appropriately; just fall through: */ } out: stat = 0; out_err: if (rqstp->rq_client) auth_domain_put(rqstp->rq_client); rqstp->rq_client = NULL; if (rqstp->rq_gssclient) auth_domain_put(rqstp->rq_gssclient); rqstp->rq_gssclient = NULL; if (rqstp->rq_cred.cr_group_info) put_group_info(rqstp->rq_cred.cr_group_info); rqstp->rq_cred.cr_group_info = NULL; if (gsd && gsd->rsci) { cache_put(&gsd->rsci->h, sn->rsc_cache); gsd->rsci = NULL; } return stat; } static void svcauth_gss_domain_release_rcu(struct rcu_head *head) { struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head); struct gss_domain *gd = container_of(dom, struct gss_domain, h); kfree(dom->name); kfree(gd); } static void svcauth_gss_domain_release(struct auth_domain *dom) { call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu); } static rpc_authflavor_t svcauth_gss_pseudoflavor(struct svc_rqst *rqstp) { return svcauth_gss_flavor(rqstp->rq_gssclient); } static struct auth_ops svcauthops_gss = { .name = "rpcsec_gss", .owner = THIS_MODULE, .flavour = RPC_AUTH_GSS, .accept = svcauth_gss_accept, .release = svcauth_gss_release, .domain_release = svcauth_gss_domain_release, .set_client = svcauth_gss_set_client, .pseudoflavor = svcauth_gss_pseudoflavor, }; static int rsi_cache_create_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd; int err; cd = cache_create_net(&rsi_cache_template, net); if (IS_ERR(cd)) return PTR_ERR(cd); err = cache_register_net(cd, net); if (err) { cache_destroy_net(cd, net); return err; } sn->rsi_cache = cd; return 0; } static void rsi_cache_destroy_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd = sn->rsi_cache; sn->rsi_cache = NULL; cache_purge(cd); cache_unregister_net(cd, net); cache_destroy_net(cd, net); } static int rsc_cache_create_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd; int err; cd = cache_create_net(&rsc_cache_template, net); if (IS_ERR(cd)) return PTR_ERR(cd); err = cache_register_net(cd, net); if (err) { cache_destroy_net(cd, net); return err; } sn->rsc_cache = cd; return 0; } static void rsc_cache_destroy_net(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct cache_detail *cd = sn->rsc_cache; sn->rsc_cache = NULL; cache_purge(cd); cache_unregister_net(cd, net); cache_destroy_net(cd, net); } int gss_svc_init_net(struct net *net) { int rv; rv = rsc_cache_create_net(net); if (rv) return rv; rv = rsi_cache_create_net(net); if (rv) goto out1; rv = create_use_gss_proxy_proc_entry(net); if (rv) goto out2; rv = create_krb5_enctypes_proc_entry(net); if (rv) goto out3; return 0; out3: destroy_use_gss_proxy_proc_entry(net); out2: rsi_cache_destroy_net(net); out1: rsc_cache_destroy_net(net); return rv; } void gss_svc_shutdown_net(struct net *net) { destroy_krb5_enctypes_proc_entry(net); destroy_use_gss_proxy_proc_entry(net); rsi_cache_destroy_net(net); rsc_cache_destroy_net(net); } int gss_svc_init(void) { return svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); } void gss_svc_shutdown(void) { svc_auth_unregister(RPC_AUTH_GSS); } |
18 87 83 83 83 83 21 18 18 16 16 2 2 2 2 2 16 16 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ /* Copyright (c) 2008-2019, IBM Corporation */ #include <linux/errno.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/xarray.h> #include <net/addrconf.h> #include <rdma/iw_cm.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> #include <rdma/uverbs_ioctl.h> #include "siw.h" #include "siw_verbs.h" #include "siw_mem.h" static int siw_qp_state_to_ib_qp_state[SIW_QP_STATE_COUNT] = { [SIW_QP_STATE_IDLE] = IB_QPS_INIT, [SIW_QP_STATE_RTR] = IB_QPS_RTR, [SIW_QP_STATE_RTS] = IB_QPS_RTS, [SIW_QP_STATE_CLOSING] = IB_QPS_SQD, [SIW_QP_STATE_TERMINATE] = IB_QPS_SQE, [SIW_QP_STATE_ERROR] = IB_QPS_ERR }; static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = SIW_QP_STATE_IDLE, [IB_QPS_INIT] = SIW_QP_STATE_IDLE, [IB_QPS_RTR] = SIW_QP_STATE_RTR, [IB_QPS_RTS] = SIW_QP_STATE_RTS, [IB_QPS_SQD] = SIW_QP_STATE_CLOSING, [IB_QPS_SQE] = SIW_QP_STATE_TERMINATE, [IB_QPS_ERR] = SIW_QP_STATE_ERROR }; static char ib_qp_state_to_string[IB_QPS_ERR + 1][sizeof("RESET")] = { [IB_QPS_RESET] = "RESET", [IB_QPS_INIT] = "INIT", [IB_QPS_RTR] = "RTR", [IB_QPS_RTS] = "RTS", [IB_QPS_SQD] = "SQD", [IB_QPS_SQE] = "SQE", [IB_QPS_ERR] = "ERR" }; void siw_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { struct siw_user_mmap_entry *entry = to_siw_mmap_entry(rdma_entry); kfree(entry); } int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) { struct siw_ucontext *uctx = to_siw_ctx(ctx); size_t size = vma->vm_end - vma->vm_start; struct rdma_user_mmap_entry *rdma_entry; struct siw_user_mmap_entry *entry; int rv = -EINVAL; /* * Must be page aligned */ if (vma->vm_start & (PAGE_SIZE - 1)) { pr_warn("siw: mmap not page aligned\n"); return -EINVAL; } rdma_entry = rdma_user_mmap_entry_get(&uctx->base_ucontext, vma); if (!rdma_entry) { siw_dbg(&uctx->sdev->base_dev, "mmap lookup failed: %lu, %#zx\n", vma->vm_pgoff, size); return -EINVAL; } entry = to_siw_mmap_entry(rdma_entry); rv = remap_vmalloc_range(vma, entry->address, 0); if (rv) pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, size); rdma_user_mmap_entry_put(rdma_entry); return rv; } int siw_alloc_ucontext(struct ib_ucontext *base_ctx, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(base_ctx->device); struct siw_ucontext *ctx = to_siw_ctx(base_ctx); struct siw_uresp_alloc_ctx uresp = {}; int rv; if (atomic_inc_return(&sdev->num_ctx) > SIW_MAX_CONTEXT) { rv = -ENOMEM; goto err_out; } ctx->sdev = sdev; uresp.dev_id = sdev->vendor_part_id; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; siw_dbg(base_ctx->device, "success. now %d context(s)\n", atomic_read(&sdev->num_ctx)); return 0; err_out: atomic_dec(&sdev->num_ctx); siw_dbg(base_ctx->device, "failure %d. now %d context(s)\n", rv, atomic_read(&sdev->num_ctx)); return rv; } void siw_dealloc_ucontext(struct ib_ucontext *base_ctx) { struct siw_ucontext *uctx = to_siw_ctx(base_ctx); atomic_dec(&uctx->sdev->num_ctx); } int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(base_dev); if (udata->inlen || udata->outlen) return -EINVAL; memset(attr, 0, sizeof(*attr)); /* Revisit atomic caps if RFC 7306 gets supported */ attr->atomic_cap = 0; attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG; attr->max_cq = sdev->attrs.max_cq; attr->max_cqe = sdev->attrs.max_cqe; attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL; attr->max_mr = sdev->attrs.max_mr; attr->max_mw = sdev->attrs.max_mw; attr->max_mr_size = ~0ull; attr->max_pd = sdev->attrs.max_pd; attr->max_qp = sdev->attrs.max_qp; attr->max_qp_init_rd_atom = sdev->attrs.max_ird; attr->max_qp_rd_atom = sdev->attrs.max_ord; attr->max_qp_wr = sdev->attrs.max_qp_wr; attr->max_recv_sge = sdev->attrs.max_sge; attr->max_res_rd_atom = sdev->attrs.max_qp * sdev->attrs.max_ird; attr->max_send_sge = sdev->attrs.max_sge; attr->max_sge_rd = sdev->attrs.max_sge_rd; attr->max_srq = sdev->attrs.max_srq; attr->max_srq_sge = sdev->attrs.max_srq_sge; attr->max_srq_wr = sdev->attrs.max_srq_wr; attr->page_size_cap = PAGE_SIZE; attr->vendor_id = SIW_VENDOR_ID; attr->vendor_part_id = sdev->vendor_part_id; addrconf_addr_eui48((u8 *)&attr->sys_image_guid, sdev->raw_gid); return 0; } int siw_query_port(struct ib_device *base_dev, u32 port, struct ib_port_attr *attr) { struct net_device *ndev; int rv; memset(attr, 0, sizeof(*attr)); rv = ib_get_eth_speed(base_dev, port, &attr->active_speed, &attr->active_width); if (rv) return rv; ndev = ib_device_get_netdev(base_dev, SIW_PORT); if (!ndev) return -ENODEV; attr->gid_tbl_len = 1; attr->max_msg_sz = -1; attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu); attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu)); attr->state = ib_get_curr_port_state(ndev); attr->phys_state = attr->state == IB_PORT_ACTIVE ? IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; /* * All zero * * attr->lid = 0; * attr->bad_pkey_cntr = 0; * attr->qkey_viol_cntr = 0; * attr->sm_lid = 0; * attr->lmc = 0; * attr->max_vl_num = 0; * attr->sm_sl = 0; * attr->subnet_timeout = 0; * attr->init_type_repy = 0; */ dev_put(ndev); return rv; } int siw_get_port_immutable(struct ib_device *base_dev, u32 port, struct ib_port_immutable *port_immutable) { struct ib_port_attr attr; int rv = siw_query_port(base_dev, port, &attr); if (rv) return rv; port_immutable->gid_tbl_len = attr.gid_tbl_len; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; return 0; } int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, union ib_gid *gid) { struct siw_device *sdev = to_siw_dev(base_dev); /* subnet_prefix == interface_id == 0; */ memset(gid, 0, sizeof(*gid)); memcpy(gid->raw, sdev->raw_gid, ETH_ALEN); return 0; } int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(pd->device); if (atomic_inc_return(&sdev->num_pd) > SIW_MAX_PD) { atomic_dec(&sdev->num_pd); return -ENOMEM; } siw_dbg_pd(pd, "now %d PD's(s)\n", atomic_read(&sdev->num_pd)); return 0; } int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(pd->device); siw_dbg_pd(pd, "free PD\n"); atomic_dec(&sdev->num_pd); return 0; } void siw_qp_get_ref(struct ib_qp *base_qp) { siw_qp_get(to_siw_qp(base_qp)); } void siw_qp_put_ref(struct ib_qp *base_qp) { siw_qp_put(to_siw_qp(base_qp)); } static struct rdma_user_mmap_entry * siw_mmap_entry_insert(struct siw_ucontext *uctx, void *address, size_t length, u64 *offset) { struct siw_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); int rv; *offset = SIW_INVAL_UOBJ_KEY; if (!entry) return NULL; entry->address = address; rv = rdma_user_mmap_entry_insert(&uctx->base_ucontext, &entry->rdma_entry, length); if (rv) { kfree(entry); return NULL; } *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); return &entry->rdma_entry; } /* * siw_create_qp() * * Create QP of requested size on given device. * * @qp: Queue pait * @attrs: Initial QP attributes. * @udata: used to provide QP ID, SQ and RQ size back to user. */ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { struct ib_pd *pd = ibqp->pd; struct siw_qp *qp = to_siw_qp(ibqp); struct ib_device *base_dev = pd->device; struct siw_device *sdev = to_siw_dev(base_dev); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); unsigned long flags; int num_sqe, num_rqe, rv = 0; size_t length; siw_dbg(base_dev, "create new QP\n"); if (attrs->create_flags) return -EOPNOTSUPP; if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); rv = -ENOMEM; goto err_atomic; } if (attrs->qp_type != IB_QPT_RC) { siw_dbg(base_dev, "only RC QP's supported\n"); rv = -EOPNOTSUPP; goto err_atomic; } if ((attrs->cap.max_send_wr > SIW_MAX_QP_WR) || (attrs->cap.max_recv_wr > SIW_MAX_QP_WR) || (attrs->cap.max_send_sge > SIW_MAX_SGE) || (attrs->cap.max_recv_sge > SIW_MAX_SGE)) { siw_dbg(base_dev, "QP size error\n"); rv = -EINVAL; goto err_atomic; } if (attrs->cap.max_inline_data > SIW_MAX_INLINE) { siw_dbg(base_dev, "max inline send: %d > %d\n", attrs->cap.max_inline_data, (int)SIW_MAX_INLINE); rv = -EINVAL; goto err_atomic; } /* * NOTE: we don't allow for a QP unable to hold any SQ WQE */ if (attrs->cap.max_send_wr == 0) { siw_dbg(base_dev, "QP must have send queue\n"); rv = -EINVAL; goto err_atomic; } if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) { siw_dbg(base_dev, "send CQ or receive CQ invalid\n"); rv = -EINVAL; goto err_atomic; } init_rwsem(&qp->state_lock); spin_lock_init(&qp->sq_lock); spin_lock_init(&qp->rq_lock); spin_lock_init(&qp->orq_lock); rv = siw_qp_add(sdev, qp); if (rv) goto err_atomic; /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); num_rqe = attrs->cap.max_recv_wr; if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); else qp->sendq = vcalloc(num_sqe, sizeof(struct siw_sqe)); if (qp->sendq == NULL) { rv = -ENOMEM; goto err_out_xa; } if (attrs->sq_sig_type != IB_SIGNAL_REQ_WR) { if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) qp->attrs.flags |= SIW_SIGNAL_ALL_WR; else { rv = -EINVAL; goto err_out_xa; } } qp->pd = pd; qp->scq = to_siw_cq(attrs->send_cq); qp->rcq = to_siw_cq(attrs->recv_cq); if (attrs->srq) { /* * SRQ support. * Verbs 6.3.7: ignore RQ size, if SRQ present * Verbs 6.3.5: do not check PD of SRQ against PD of QP */ qp->srq = to_siw_srq(attrs->srq); qp->attrs.rq_size = 0; siw_dbg(base_dev, "QP [%u]: SRQ attached\n", qp->base_qp.qp_num); } else if (num_rqe) { if (udata) qp->recvq = vmalloc_user(num_rqe * sizeof(struct siw_rqe)); else qp->recvq = vcalloc(num_rqe, sizeof(struct siw_rqe)); if (qp->recvq == NULL) { rv = -ENOMEM; goto err_out_xa; } qp->attrs.rq_size = num_rqe; } qp->attrs.sq_size = num_sqe; qp->attrs.sq_max_sges = attrs->cap.max_send_sge; qp->attrs.rq_max_sges = attrs->cap.max_recv_sge; /* Make those two tunables fixed for now. */ qp->tx_ctx.gso_seg_limit = 1; qp->tx_ctx.zcopy_tx = zcopy_tx; qp->attrs.state = SIW_QP_STATE_IDLE; if (udata) { struct siw_uresp_create_qp uresp = {}; uresp.num_sqe = num_sqe; uresp.num_rqe = num_rqe; uresp.qp_id = qp_id(qp); if (qp->sendq) { length = num_sqe * sizeof(struct siw_sqe); qp->sq_entry = siw_mmap_entry_insert(uctx, qp->sendq, length, &uresp.sq_key); if (!qp->sq_entry) { rv = -ENOMEM; goto err_out_xa; } } if (qp->recvq) { length = num_rqe * sizeof(struct siw_rqe); qp->rq_entry = siw_mmap_entry_insert(uctx, qp->recvq, length, &uresp.rq_key); if (!qp->rq_entry) { uresp.sq_key = SIW_INVAL_UOBJ_KEY; rv = -ENOMEM; goto err_out_xa; } } if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out_xa; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out_xa; } qp->tx_cpu = siw_get_tx_cpu(sdev); if (qp->tx_cpu < 0) { rv = -EINVAL; goto err_out_xa; } INIT_LIST_HEAD(&qp->devq); spin_lock_irqsave(&sdev->lock, flags); list_add_tail(&qp->devq, &sdev->qp_list); spin_unlock_irqrestore(&sdev->lock, flags); init_completion(&qp->qp_free); return 0; err_out_xa: xa_erase(&sdev->qp_xa, qp_id(qp)); if (uctx) { rdma_user_mmap_entry_remove(qp->sq_entry); rdma_user_mmap_entry_remove(qp->rq_entry); } vfree(qp->sendq); vfree(qp->recvq); err_atomic: atomic_dec(&sdev->num_qp); return rv; } /* * Minimum siw_query_qp() verb interface. * * @qp_attr_mask is not used but all available information is provided */ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct siw_qp *qp; struct net_device *ndev; if (base_qp && qp_attr && qp_init_attr) qp = to_siw_qp(base_qp); else return -EINVAL; ndev = ib_device_get_netdev(base_qp->device, SIW_PORT); if (!ndev) return -ENODEV; qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state]; qp_attr->cap.max_inline_data = SIW_MAX_INLINE; qp_attr->cap.max_send_wr = qp->attrs.sq_size; qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; qp_attr->cap.max_recv_wr = qp->attrs.rq_size; qp_attr->cap.max_recv_sge = qp->attrs.rq_max_sges; qp_attr->path_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu)); qp_attr->max_rd_atomic = qp->attrs.irq_size; qp_attr->max_dest_rd_atomic = qp->attrs.orq_size; qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; qp_init_attr->qp_type = base_qp->qp_type; qp_init_attr->send_cq = base_qp->send_cq; qp_init_attr->recv_cq = base_qp->recv_cq; qp_init_attr->srq = base_qp->srq; qp_init_attr->cap = qp_attr->cap; dev_put(ndev); return 0; } int siw_verbs_modify_qp(struct ib_qp *base_qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct siw_qp_attrs new_attrs; enum siw_qp_attr_mask siw_attr_mask = 0; struct siw_qp *qp = to_siw_qp(base_qp); int rv = 0; if (!attr_mask) return 0; if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; memset(&new_attrs, 0, sizeof(new_attrs)); if (attr_mask & IB_QP_ACCESS_FLAGS) { siw_attr_mask = SIW_QP_ATTR_ACCESS_FLAGS; if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) new_attrs.flags |= SIW_RDMA_READ_ENABLED; if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) new_attrs.flags |= SIW_RDMA_WRITE_ENABLED; if (attr->qp_access_flags & IB_ACCESS_MW_BIND) new_attrs.flags |= SIW_RDMA_BIND_ENABLED; } if (attr_mask & IB_QP_STATE) { siw_dbg_qp(qp, "desired IB QP state: %s\n", ib_qp_state_to_string[attr->qp_state]); new_attrs.state = ib_qp_state_to_siw_qp_state[attr->qp_state]; if (new_attrs.state > SIW_QP_STATE_RTS) qp->tx_ctx.tx_suspend = 1; siw_attr_mask |= SIW_QP_ATTR_STATE; } if (!siw_attr_mask) goto out; down_write(&qp->state_lock); rv = siw_qp_modify(qp, &new_attrs, siw_attr_mask); up_write(&qp->state_lock); out: return rv; } int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) { struct siw_qp *qp = to_siw_qp(base_qp); struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); struct siw_qp_attrs qp_attrs; siw_dbg_qp(qp, "state %d\n", qp->attrs.state); /* * Mark QP as in process of destruction to prevent from * any async callbacks to RDMA core */ qp->attrs.flags |= SIW_QP_IN_DESTROY; qp->rx_stream.rx_suspend = 1; if (uctx) { rdma_user_mmap_entry_remove(qp->sq_entry); rdma_user_mmap_entry_remove(qp->rq_entry); } down_write(&qp->state_lock); qp_attrs.state = SIW_QP_STATE_ERROR; siw_qp_modify(qp, &qp_attrs, SIW_QP_ATTR_STATE); if (qp->cep) { siw_cep_put(qp->cep); qp->cep = NULL; } up_write(&qp->state_lock); kfree(qp->tx_ctx.mpa_crc_hd); kfree(qp->rx_stream.mpa_crc_hd); qp->scq = qp->rcq = NULL; siw_qp_put(qp); wait_for_completion(&qp->qp_free); return 0; } /* * siw_copy_inline_sgl() * * Prepare sgl of inlined data for sending. For userland callers * function checks if given buffer addresses and len's are within * process context bounds. * Data from all provided sge's are copied together into the wqe, * referenced by a single sge. */ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, struct siw_sqe *sqe) { struct ib_sge *core_sge = core_wr->sg_list; void *kbuf = &sqe->sge[1]; int num_sge = core_wr->num_sge, bytes = 0; sqe->sge[0].laddr = (uintptr_t)kbuf; sqe->sge[0].lkey = 0; while (num_sge--) { if (!core_sge->length) { core_sge++; continue; } bytes += core_sge->length; if (bytes > SIW_MAX_INLINE) { bytes = -EINVAL; break; } memcpy(kbuf, ib_virt_dma_to_ptr(core_sge->addr), core_sge->length); kbuf += core_sge->length; core_sge++; } sqe->sge[0].length = max(bytes, 0); sqe->num_sge = bytes > 0 ? 1 : 0; return bytes; } /* Complete SQ WR's without processing */ static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { int rv = 0; while (wr) { struct siw_sqe sqe = {}; switch (wr->opcode) { case IB_WR_RDMA_WRITE: sqe.opcode = SIW_OP_WRITE; break; case IB_WR_RDMA_READ: sqe.opcode = SIW_OP_READ; break; case IB_WR_RDMA_READ_WITH_INV: sqe.opcode = SIW_OP_READ_LOCAL_INV; break; case IB_WR_SEND: sqe.opcode = SIW_OP_SEND; break; case IB_WR_SEND_WITH_IMM: sqe.opcode = SIW_OP_SEND_WITH_IMM; break; case IB_WR_SEND_WITH_INV: sqe.opcode = SIW_OP_SEND_REMOTE_INV; break; case IB_WR_LOCAL_INV: sqe.opcode = SIW_OP_INVAL_STAG; break; case IB_WR_REG_MR: sqe.opcode = SIW_OP_REG_MR; break; default: rv = -EINVAL; break; } if (!rv) { sqe.id = wr->wr_id; rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); } if (rv) { if (bad_wr) *bad_wr = wr; break; } wr = wr->next; } return rv; } /* Complete RQ WR's without processing */ static int siw_rq_flush_wr(struct siw_qp *qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct siw_rqe rqe = {}; int rv = 0; while (wr) { rqe.id = wr->wr_id; rv = siw_rqe_complete(qp, &rqe, 0, 0, SIW_WC_WR_FLUSH_ERR); if (rv) { if (bad_wr) *bad_wr = wr; break; } wr = wr->next; } return rv; } /* * siw_post_send() * * Post a list of S-WR's to a SQ. * * @base_qp: Base QP contained in siw QP * @wr: Null terminated list of user WR's * @bad_wr: Points to failing WR in case of synchronous failure. */ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct siw_qp *qp = to_siw_qp(base_qp); struct siw_wqe *wqe = tx_wqe(qp); unsigned long flags; int rv = 0; if (wr && !rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "wr must be empty for user mapped sq\n"); *bad_wr = wr; return -EINVAL; } /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * ERROR state is final, so we can be sure * this state will not change as long as the QP * exists. * * This handles an ib_drain_sq() call with * a concurrent request to set the QP state * to ERROR. */ rv = siw_sq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } return rv; } if (unlikely(qp->attrs.state != SIW_QP_STATE_RTS)) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * Immediately flush this WR to CQ, if QP * is in ERROR state. SQ is guaranteed to * be empty, so WR complets in-order. * * Typically triggered by ib_drain_sq(). */ rv = siw_sq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } up_read(&qp->state_lock); return rv; } spin_lock_irqsave(&qp->sq_lock, flags); while (wr) { u32 idx = qp->sq_put % qp->attrs.sq_size; struct siw_sqe *sqe = &qp->sendq[idx]; if (sqe->flags) { siw_dbg_qp(qp, "sq full\n"); rv = -ENOMEM; break; } if (wr->num_sge > qp->attrs.sq_max_sges) { siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } sqe->id = wr->wr_id; if ((wr->send_flags & IB_SEND_SIGNALED) || (qp->attrs.flags & SIW_SIGNAL_ALL_WR)) sqe->flags |= SIW_WQE_SIGNALLED; if (wr->send_flags & IB_SEND_FENCE) sqe->flags |= SIW_WQE_READ_FENCE; switch (wr->opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_INV: if (wr->send_flags & IB_SEND_SOLICITED) sqe->flags |= SIW_WQE_SOLICITED; if (!(wr->send_flags & IB_SEND_INLINE)) { siw_copy_sgl(wr->sg_list, sqe->sge, wr->num_sge); sqe->num_sge = wr->num_sge; } else { rv = siw_copy_inline_sgl(wr, sqe); if (rv <= 0) { rv = -EINVAL; break; } sqe->flags |= SIW_WQE_INLINE; sqe->num_sge = 1; } if (wr->opcode == IB_WR_SEND) sqe->opcode = SIW_OP_SEND; else { sqe->opcode = SIW_OP_SEND_REMOTE_INV; sqe->rkey = wr->ex.invalidate_rkey; } break; case IB_WR_RDMA_READ_WITH_INV: case IB_WR_RDMA_READ: /* * iWarp restricts RREAD sink to SGL containing * 1 SGE only. we could relax to SGL with multiple * elements referring the SAME ltag or even sending * a private per-rreq tag referring to a checked * local sgl with MULTIPLE ltag's. */ if (unlikely(wr->num_sge != 1)) { rv = -EINVAL; break; } siw_copy_sgl(wr->sg_list, &sqe->sge[0], 1); /* * NOTE: zero length RREAD is allowed! */ sqe->raddr = rdma_wr(wr)->remote_addr; sqe->rkey = rdma_wr(wr)->rkey; sqe->num_sge = 1; if (wr->opcode == IB_WR_RDMA_READ) sqe->opcode = SIW_OP_READ; else sqe->opcode = SIW_OP_READ_LOCAL_INV; break; case IB_WR_RDMA_WRITE: if (!(wr->send_flags & IB_SEND_INLINE)) { siw_copy_sgl(wr->sg_list, &sqe->sge[0], wr->num_sge); sqe->num_sge = wr->num_sge; } else { rv = siw_copy_inline_sgl(wr, sqe); if (unlikely(rv < 0)) { rv = -EINVAL; break; } sqe->flags |= SIW_WQE_INLINE; sqe->num_sge = 1; } sqe->raddr = rdma_wr(wr)->remote_addr; sqe->rkey = rdma_wr(wr)->rkey; sqe->opcode = SIW_OP_WRITE; break; case IB_WR_REG_MR: sqe->base_mr = (uintptr_t)reg_wr(wr)->mr; sqe->rkey = reg_wr(wr)->key; sqe->access = reg_wr(wr)->access & IWARP_ACCESS_MASK; sqe->opcode = SIW_OP_REG_MR; break; case IB_WR_LOCAL_INV: sqe->rkey = wr->ex.invalidate_rkey; sqe->opcode = SIW_OP_INVAL_STAG; break; default: siw_dbg_qp(qp, "ib wr type %d unsupported\n", wr->opcode); rv = -EINVAL; break; } siw_dbg_qp(qp, "opcode %d, flags 0x%x, wr_id 0x%pK\n", sqe->opcode, sqe->flags, (void *)(uintptr_t)sqe->id); if (unlikely(rv < 0)) break; /* make SQE only valid after completely written */ smp_wmb(); sqe->flags |= SIW_WQE_VALID; qp->sq_put++; wr = wr->next; } /* * Send directly if SQ processing is not in progress. * Eventual immediate errors (rv < 0) do not affect the involved * RI resources (Verbs, 8.3.1) and thus do not prevent from SQ * processing, if new work is already pending. But rv must be passed * to caller. */ if (wqe->wr_status != SIW_WR_IDLE) { spin_unlock_irqrestore(&qp->sq_lock, flags); goto skip_direct_sending; } rv = siw_activate_tx(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); if (rv <= 0) goto skip_direct_sending; if (rdma_is_kernel_res(&qp->base_qp.res)) { rv = siw_sq_start(qp); } else { qp->tx_ctx.in_syscall = 1; if (siw_qp_sq_process(qp) != 0 && !(qp->tx_ctx.tx_suspend)) siw_qp_cm_drop(qp, 0); qp->tx_ctx.in_syscall = 0; } skip_direct_sending: up_read(&qp->state_lock); if (rv >= 0) return 0; /* * Immediate error */ siw_dbg_qp(qp, "error %d\n", rv); *bad_wr = wr; return rv; } /* * siw_post_receive() * * Post a list of R-WR's to a RQ. * * @base_qp: Base QP contained in siw QP * @wr: Null terminated list of user WR's * @bad_wr: Points to failing WR in case of synchronous failure. */ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct siw_qp *qp = to_siw_qp(base_qp); unsigned long flags; int rv = 0; if (qp->srq || qp->attrs.rq_size == 0) { *bad_wr = wr; return -EINVAL; } if (!rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); *bad_wr = wr; return -EINVAL; } /* * Try to acquire QP state lock. Must be non-blocking * to accommodate kernel clients needs. */ if (!down_read_trylock(&qp->state_lock)) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * ERROR state is final, so we can be sure * this state will not change as long as the QP * exists. * * This handles an ib_drain_rq() call with * a concurrent request to set the QP state * to ERROR. */ rv = siw_rq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP locked, state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } return rv; } if (qp->attrs.state > SIW_QP_STATE_RTS) { if (qp->attrs.state == SIW_QP_STATE_ERROR) { /* * Immediately flush this WR to CQ, if QP * is in ERROR state. RQ is guaranteed to * be empty, so WR complets in-order. * * Typically triggered by ib_drain_rq(). */ rv = siw_rq_flush_wr(qp, wr, bad_wr); } else { siw_dbg_qp(qp, "QP out of state %d\n", qp->attrs.state); *bad_wr = wr; rv = -ENOTCONN; } up_read(&qp->state_lock); return rv; } /* * Serialize potentially multiple producers. * Not needed for single threaded consumer side. */ spin_lock_irqsave(&qp->rq_lock, flags); while (wr) { u32 idx = qp->rq_put % qp->attrs.rq_size; struct siw_rqe *rqe = &qp->recvq[idx]; if (rqe->flags) { siw_dbg_qp(qp, "RQ full\n"); rv = -ENOMEM; break; } if (wr->num_sge > qp->attrs.rq_max_sges) { siw_dbg_qp(qp, "too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } rqe->id = wr->wr_id; rqe->num_sge = wr->num_sge; siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); /* make sure RQE is completely written before valid */ smp_wmb(); rqe->flags = SIW_WQE_VALID; qp->rq_put++; wr = wr->next; } spin_unlock_irqrestore(&qp->rq_lock, flags); up_read(&qp->state_lock); if (rv < 0) { siw_dbg_qp(qp, "error %d\n", rv); *bad_wr = wr; } return rv > 0 ? 0 : rv; } int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) { struct siw_cq *cq = to_siw_cq(base_cq); struct siw_device *sdev = to_siw_dev(base_cq->device); struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); siw_dbg_cq(cq, "free CQ resources\n"); siw_cq_flush(cq); if (ctx) rdma_user_mmap_entry_remove(cq->cq_entry); atomic_dec(&sdev->num_cq); vfree(cq->queue); return 0; } /* * siw_create_cq() * * Populate CQ of requested size * * @base_cq: CQ as allocated by RDMA midlayer * @attr: Initial CQ attributes * @attrs: uverbs bundle */ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr, struct uverbs_attr_bundle *attrs) { struct ib_udata *udata = &attrs->driver_udata; struct siw_device *sdev = to_siw_dev(base_cq->device); struct siw_cq *cq = to_siw_cq(base_cq); int rv, size = attr->cqe; if (attr->flags) return -EOPNOTSUPP; if (atomic_inc_return(&sdev->num_cq) > SIW_MAX_CQ) { siw_dbg(base_cq->device, "too many CQ's\n"); rv = -ENOMEM; goto err_out; } if (size < 1 || size > sdev->attrs.max_cqe) { siw_dbg(base_cq->device, "CQ size error: %d\n", size); rv = -EINVAL; goto err_out; } size = roundup_pow_of_two(size); cq->base_cq.cqe = size; cq->num_cqe = size; if (udata) cq->queue = vmalloc_user(size * sizeof(struct siw_cqe) + sizeof(struct siw_cq_ctrl)); else cq->queue = vzalloc(size * sizeof(struct siw_cqe) + sizeof(struct siw_cq_ctrl)); if (cq->queue == NULL) { rv = -ENOMEM; goto err_out; } get_random_bytes(&cq->id, 4); siw_dbg(base_cq->device, "new CQ [%u]\n", cq->id); spin_lock_init(&cq->lock); cq->notify = (struct siw_cq_ctrl *)&cq->queue[size]; if (udata) { struct siw_uresp_create_cq uresp = {}; struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); size_t length = size * sizeof(struct siw_cqe) + sizeof(struct siw_cq_ctrl); cq->cq_entry = siw_mmap_entry_insert(ctx, cq->queue, length, &uresp.cq_key); if (!cq->cq_entry) { rv = -ENOMEM; goto err_out; } uresp.cq_id = cq->id; uresp.num_cqe = size; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; } return 0; err_out: siw_dbg(base_cq->device, "CQ creation failed: %d", rv); if (cq->queue) { struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); if (ctx) rdma_user_mmap_entry_remove(cq->cq_entry); vfree(cq->queue); } atomic_dec(&sdev->num_cq); return rv; } /* * siw_poll_cq() * * Reap CQ entries if available and copy work completion status into * array of WC's provided by caller. Returns number of reaped CQE's. * * @base_cq: Base CQ contained in siw CQ. * @num_cqe: Maximum number of CQE's to reap. * @wc: Array of work completions to be filled by siw. */ int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc) { struct siw_cq *cq = to_siw_cq(base_cq); int i; for (i = 0; i < num_cqe; i++) { if (!siw_reap_cqe(cq, wc)) break; wc++; } return i; } /* * siw_req_notify_cq() * * Request notification for new CQE's added to that CQ. * Defined flags: * o SIW_CQ_NOTIFY_SOLICITED lets siw trigger a notification * event if a WQE with notification flag set enters the CQ * o SIW_CQ_NOTIFY_NEXT_COMP lets siw trigger a notification * event if a WQE enters the CQ. * o IB_CQ_REPORT_MISSED_EVENTS: return value will provide the * number of not reaped CQE's regardless of its notification * type and current or new CQ notification settings. * * @base_cq: Base CQ contained in siw CQ. * @flags: Requested notification flags. */ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags) { struct siw_cq *cq = to_siw_cq(base_cq); siw_dbg_cq(cq, "flags: 0x%02x\n", flags); if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) /* * Enable CQ event for next solicited completion. * and make it visible to all associated producers. */ smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED); else /* * Enable CQ event for any signalled completion. * and make it visible to all associated producers. */ smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL); if (flags & IB_CQ_REPORT_MISSED_EVENTS) return cq->cq_put - cq->cq_get; return 0; } /* * siw_dereg_mr() * * Release Memory Region. * * @base_mr: Base MR contained in siw MR. * @udata: points to user context, unused. */ int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata) { struct siw_mr *mr = to_siw_mr(base_mr); struct siw_device *sdev = to_siw_dev(base_mr->device); siw_dbg_mem(mr->mem, "deregister MR\n"); atomic_dec(&sdev->num_mr); siw_mr_drop_mem(mr); kfree_rcu(mr, rcu); return 0; } /* * siw_reg_user_mr() * * Register Memory Region. * * @pd: Protection Domain * @start: starting address of MR (virtual address) * @len: len of MR * @rnic_va: not used by siw * @rights: MR access rights * @udata: user buffer to communicate STag and Key. */ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, u64 rnic_va, int rights, struct ib_udata *udata) { struct siw_mr *mr = NULL; struct siw_umem *umem = NULL; struct siw_ureq_reg_mr ureq; struct siw_device *sdev = to_siw_dev(pd->device); int rv; siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", (void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va, (unsigned long long)len); if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { siw_dbg_pd(pd, "too many mr's\n"); rv = -ENOMEM; goto err_out; } if (!len) { rv = -EINVAL; goto err_out; } umem = siw_umem_get(pd->device, start, len, rights); if (IS_ERR(umem)) { rv = PTR_ERR(umem); siw_dbg_pd(pd, "getting user memory failed: %d\n", rv); umem = NULL; goto err_out; } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { rv = -ENOMEM; goto err_out; } rv = siw_mr_add_mem(mr, pd, umem, start, len, rights); if (rv) goto err_out; if (udata) { struct siw_uresp_reg_mr uresp = {}; struct siw_mem *mem = mr->mem; if (udata->inlen < sizeof(ureq)) { rv = -EINVAL; goto err_out; } rv = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); if (rv) goto err_out; mr->base_mr.lkey |= ureq.stag_key; mr->base_mr.rkey |= ureq.stag_key; mem->stag |= ureq.stag_key; uresp.stag = mem->stag; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; } mr->mem->stag_valid = 1; return &mr->base_mr; err_out: atomic_dec(&sdev->num_mr); if (mr) { if (mr->mem) siw_mr_drop_mem(mr); kfree_rcu(mr, rcu); } else { if (umem) siw_umem_release(umem); } return ERR_PTR(rv); } struct ib_mr *siw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_sge) { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mr *mr = NULL; struct siw_pbl *pbl = NULL; int rv; if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { siw_dbg_pd(pd, "too many mr's\n"); rv = -ENOMEM; goto err_out; } if (mr_type != IB_MR_TYPE_MEM_REG) { siw_dbg_pd(pd, "mr type %d unsupported\n", mr_type); rv = -EOPNOTSUPP; goto err_out; } if (max_sge > SIW_MAX_SGE_PBL) { siw_dbg_pd(pd, "too many sge's: %d\n", max_sge); rv = -ENOMEM; goto err_out; } pbl = siw_pbl_alloc(max_sge); if (IS_ERR(pbl)) { rv = PTR_ERR(pbl); siw_dbg_pd(pd, "pbl allocation failed: %d\n", rv); pbl = NULL; goto err_out; } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { rv = -ENOMEM; goto err_out; } rv = siw_mr_add_mem(mr, pd, pbl, 0, max_sge * PAGE_SIZE, 0); if (rv) goto err_out; mr->mem->is_pbl = 1; siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag); return &mr->base_mr; err_out: atomic_dec(&sdev->num_mr); if (!mr) { kfree(pbl); } else { if (mr->mem) siw_mr_drop_mem(mr); kfree_rcu(mr, rcu); } siw_dbg_pd(pd, "failed: %d\n", rv); return ERR_PTR(rv); } /* Just used to count number of pages being mapped */ static int siw_set_pbl_page(struct ib_mr *base_mr, u64 buf_addr) { return 0; } int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, unsigned int *sg_off) { struct scatterlist *slp; struct siw_mr *mr = to_siw_mr(base_mr); struct siw_mem *mem = mr->mem; struct siw_pbl *pbl = mem->pbl; struct siw_pble *pble; unsigned long pbl_size; int i, rv; if (!pbl) { siw_dbg_mem(mem, "no PBL allocated\n"); return -EINVAL; } pble = pbl->pbe; if (pbl->max_buf < num_sle) { siw_dbg_mem(mem, "too many SGE's: %d > %d\n", num_sle, pbl->max_buf); return -ENOMEM; } for_each_sg(sl, slp, num_sle, i) { if (sg_dma_len(slp) == 0) { siw_dbg_mem(mem, "empty SGE\n"); return -EINVAL; } if (i == 0) { pble->addr = sg_dma_address(slp); pble->size = sg_dma_len(slp); pble->pbl_off = 0; pbl_size = pble->size; pbl->num_buf = 1; } else { /* Merge PBL entries if adjacent */ if (pble->addr + pble->size == sg_dma_address(slp)) { pble->size += sg_dma_len(slp); } else { pble++; pbl->num_buf++; pble->addr = sg_dma_address(slp); pble->size = sg_dma_len(slp); pble->pbl_off = pbl_size; } pbl_size += sg_dma_len(slp); } siw_dbg_mem(mem, "sge[%d], size %u, addr 0x%p, total %lu\n", i, pble->size, ib_virt_dma_to_ptr(pble->addr), pbl_size); } rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); if (rv > 0) { mem->len = base_mr->length; mem->va = base_mr->iova; siw_dbg_mem(mem, "%llu bytes, start 0x%pK, %u SLE to %u entries\n", mem->len, (void *)(uintptr_t)mem->va, num_sle, pbl->num_buf); } return rv; } /* * siw_get_dma_mr() * * Create a (empty) DMA memory region, where no umem is attached. */ struct ib_mr *siw_get_dma_mr(struct ib_pd *pd, int rights) { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mr *mr = NULL; int rv; if (atomic_inc_return(&sdev->num_mr) > SIW_MAX_MR) { siw_dbg_pd(pd, "too many mr's\n"); rv = -ENOMEM; goto err_out; } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { rv = -ENOMEM; goto err_out; } rv = siw_mr_add_mem(mr, pd, NULL, 0, ULONG_MAX, rights); if (rv) goto err_out; mr->mem->stag_valid = 1; siw_dbg_pd(pd, "[MEM %u]: success\n", mr->mem->stag); return &mr->base_mr; err_out: if (rv) kfree(mr); atomic_dec(&sdev->num_mr); return ERR_PTR(rv); } /* * siw_create_srq() * * Create Shared Receive Queue of attributes @init_attrs * within protection domain given by @pd. * * @base_srq: Base SRQ contained in siw SRQ. * @init_attrs: SRQ init attributes. * @udata: points to user context */ int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *init_attrs, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); struct ib_srq_attr *attrs = &init_attrs->attr; struct siw_device *sdev = to_siw_dev(base_srq->device); struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); int rv; if (init_attrs->srq_type != IB_SRQT_BASIC) return -EOPNOTSUPP; if (atomic_inc_return(&sdev->num_srq) > SIW_MAX_SRQ) { siw_dbg_pd(base_srq->pd, "too many SRQ's\n"); rv = -ENOMEM; goto err_out; } if (attrs->max_wr == 0 || attrs->max_wr > SIW_MAX_SRQ_WR || attrs->max_sge > SIW_MAX_SGE || attrs->srq_limit > attrs->max_wr) { rv = -EINVAL; goto err_out; } srq->max_sge = attrs->max_sge; srq->num_rqe = roundup_pow_of_two(attrs->max_wr); srq->limit = attrs->srq_limit; if (srq->limit) srq->armed = true; srq->is_kernel_res = !udata; if (udata) srq->recvq = vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe)); else srq->recvq = vcalloc(srq->num_rqe, sizeof(struct siw_rqe)); if (srq->recvq == NULL) { rv = -ENOMEM; goto err_out; } if (udata) { struct siw_uresp_create_srq uresp = {}; size_t length = srq->num_rqe * sizeof(struct siw_rqe); srq->srq_entry = siw_mmap_entry_insert(ctx, srq->recvq, length, &uresp.srq_key); if (!srq->srq_entry) { rv = -ENOMEM; goto err_out; } uresp.num_rqe = srq->num_rqe; if (udata->outlen < sizeof(uresp)) { rv = -EINVAL; goto err_out; } rv = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rv) goto err_out; } spin_lock_init(&srq->lock); siw_dbg_pd(base_srq->pd, "[SRQ]: success\n"); return 0; err_out: if (srq->recvq) { if (ctx) rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); } atomic_dec(&sdev->num_srq); return rv; } /* * siw_modify_srq() * * Modify SRQ. The caller may resize SRQ and/or set/reset notification * limit and (re)arm IB_EVENT_SRQ_LIMIT_REACHED notification. * * NOTE: it is unclear if RDMA core allows for changing the MAX_SGE * parameter. siw_modify_srq() does not check the attrs->max_sge param. */ int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); unsigned long flags; int rv = 0; spin_lock_irqsave(&srq->lock, flags); if (attr_mask & IB_SRQ_MAX_WR) { /* resize request not yet supported */ rv = -EOPNOTSUPP; goto out; } if (attr_mask & IB_SRQ_LIMIT) { if (attrs->srq_limit) { if (unlikely(attrs->srq_limit > srq->num_rqe)) { rv = -EINVAL; goto out; } srq->armed = true; } else { srq->armed = false; } srq->limit = attrs->srq_limit; } out: spin_unlock_irqrestore(&srq->lock, flags); return rv; } /* * siw_query_srq() * * Query SRQ attributes. */ int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs) { struct siw_srq *srq = to_siw_srq(base_srq); unsigned long flags; spin_lock_irqsave(&srq->lock, flags); attrs->max_wr = srq->num_rqe; attrs->max_sge = srq->max_sge; attrs->srq_limit = srq->limit; spin_unlock_irqrestore(&srq->lock, flags); return 0; } /* * siw_destroy_srq() * * Destroy SRQ. * It is assumed that the SRQ is not referenced by any * QP anymore - the code trusts the RDMA core environment to keep track * of QP references. */ int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); struct siw_device *sdev = to_siw_dev(base_srq->device); struct siw_ucontext *ctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); if (ctx) rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); atomic_dec(&sdev->num_srq); return 0; } /* * siw_post_srq_recv() * * Post a list of receive queue elements to SRQ. * NOTE: The function does not check or lock a certain SRQ state * during the post operation. The code simply trusts the * RDMA core environment. * * @base_srq: Base SRQ contained in siw SRQ * @wr: List of R-WR's * @bad_wr: Updated to failing WR if posting fails. */ int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct siw_srq *srq = to_siw_srq(base_srq); unsigned long flags; int rv = 0; if (unlikely(!srq->is_kernel_res)) { siw_dbg_pd(base_srq->pd, "[SRQ]: no kernel post_recv for mapped srq\n"); rv = -EINVAL; goto out; } /* * Serialize potentially multiple producers. * Also needed to serialize potentially multiple * consumers. */ spin_lock_irqsave(&srq->lock, flags); while (wr) { u32 idx = srq->rq_put % srq->num_rqe; struct siw_rqe *rqe = &srq->recvq[idx]; if (rqe->flags) { siw_dbg_pd(base_srq->pd, "SRQ full\n"); rv = -ENOMEM; break; } if (unlikely(wr->num_sge > srq->max_sge)) { siw_dbg_pd(base_srq->pd, "[SRQ]: too many sge's: %d\n", wr->num_sge); rv = -EINVAL; break; } rqe->id = wr->wr_id; rqe->num_sge = wr->num_sge; siw_copy_sgl(wr->sg_list, rqe->sge, wr->num_sge); /* Make sure S-RQE is completely written before valid */ smp_wmb(); rqe->flags = SIW_WQE_VALID; srq->rq_put++; wr = wr->next; } spin_unlock_irqrestore(&srq->lock, flags); out: if (unlikely(rv < 0)) { siw_dbg_pd(base_srq->pd, "[SRQ]: error %d\n", rv); *bad_wr = wr; } return rv; } void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype) { struct ib_event event; struct ib_qp *base_qp = &qp->base_qp; /* * Do not report asynchronous errors on QP which gets * destroyed via verbs interface (siw_destroy_qp()) */ if (qp->attrs.flags & SIW_QP_IN_DESTROY) return; event.event = etype; event.device = base_qp->device; event.element.qp = base_qp; if (base_qp->event_handler) { siw_dbg_qp(qp, "reporting event %d\n", etype); base_qp->event_handler(&event, base_qp->qp_context); } } void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype) { struct ib_event event; struct ib_cq *base_cq = &cq->base_cq; event.event = etype; event.device = base_cq->device; event.element.cq = base_cq; if (base_cq->event_handler) { siw_dbg_cq(cq, "reporting CQ event %d\n", etype); base_cq->event_handler(&event, base_cq->cq_context); } } void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype) { struct ib_event event; struct ib_srq *base_srq = &srq->base_srq; event.event = etype; event.device = base_srq->device; event.element.srq = base_srq; if (base_srq->event_handler) { siw_dbg_pd(srq->base_srq.pd, "reporting SRQ event %d\n", etype); base_srq->event_handler(&event, base_srq->srq_context); } } void siw_port_event(struct siw_device *sdev, u32 port, enum ib_event_type etype) { struct ib_event event; event.event = etype; event.device = &sdev->base_dev; event.element.port_num = port; siw_dbg(&sdev->base_dev, "reporting port event %d\n", etype); ib_dispatch_event(&event); } |
6 6 1 1 1 3 6 6 6 6 6 5 5 4 6 3 2 6 7 1 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 | // SPDX-License-Identifier: GPL-2.0-or-later /* * spca1528 subdriver * * Copyright (C) 2010-2011 Jean-Francois Moine (http://moinejf.free.fr) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca1528" #include "gspca.h" #include "jpeg.h" MODULE_AUTHOR("Jean-Francois Moine <http://moinejf.free.fr>"); MODULE_DESCRIPTION("SPCA1528 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 pkt_seq; u8 jpeg_hdr[JPEG_HDR_SZ]; }; static const struct v4l2_pix_format vga_mode[] = { /* (does not work correctly) {176, 144, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 5 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 3}, */ {320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 4 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 2}, {640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 8 + 590, .colorspace = V4L2_COLORSPACE_JPEG, .priv = 1}, }; /* read <len> bytes to gspca usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, u8 req, u16 index, int len) { #if USB_BUF_SZ < 64 #error "USB buffer too small" #endif struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, /* value */ index, gspca_dev->usb_buf, len, 500); gspca_dbg(gspca_dev, D_USBI, "GET %02x 0000 %04x %02x\n", req, index, gspca_dev->usb_buf[0]); if (ret < 0) { pr_err("reg_r err %d\n", ret); gspca_dev->usb_err = ret; /* * Make sure the buffer is zeroed to avoid uninitialized * values. */ memset(gspca_dev->usb_buf, 0, USB_BUF_SZ); } } static void reg_w(struct gspca_dev *gspca_dev, u8 req, u16 value, u16 index) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; gspca_dbg(gspca_dev, D_USBO, "SET %02x %04x %04x\n", req, value, index); ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); if (ret < 0) { pr_err("reg_w err %d\n", ret); gspca_dev->usb_err = ret; } } static void reg_wb(struct gspca_dev *gspca_dev, u8 req, u16 value, u16 index, u8 byte) { struct usb_device *dev = gspca_dev->dev; int ret; if (gspca_dev->usb_err < 0) return; gspca_dbg(gspca_dev, D_USBO, "SET %02x %04x %04x %02x\n", req, value, index, byte); gspca_dev->usb_buf[0] = byte; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, gspca_dev->usb_buf, 1, 500); if (ret < 0) { pr_err("reg_w err %d\n", ret); gspca_dev->usb_err = ret; } } static void wait_status_0(struct gspca_dev *gspca_dev) { int i, w; i = 16; w = 0; do { reg_r(gspca_dev, 0x21, 0x0000, 1); if (gspca_dev->usb_buf[0] == 0) return; w += 15; msleep(w); } while (--i > 0); gspca_err(gspca_dev, "wait_status_0 timeout\n"); gspca_dev->usb_err = -ETIME; } static void wait_status_1(struct gspca_dev *gspca_dev) { int i; i = 10; do { reg_r(gspca_dev, 0x21, 0x0001, 1); msleep(10); if (gspca_dev->usb_buf[0] == 1) { reg_wb(gspca_dev, 0x21, 0x0000, 0x0001, 0x00); reg_r(gspca_dev, 0x21, 0x0001, 1); return; } } while (--i > 0); gspca_err(gspca_dev, "wait_status_1 timeout\n"); gspca_dev->usb_err = -ETIME; } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc0, 0x0000, 0x00c0, val); } static void setcontrast(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc1, 0x0000, 0x00c1, val); } static void sethue(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc2, 0x0000, 0x0000, val); } static void setcolor(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc3, 0x0000, 0x00c3, val); } static void setsharpness(struct gspca_dev *gspca_dev, s32 val) { reg_wb(gspca_dev, 0xc4, 0x0000, 0x00c4, val); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { gspca_dev->cam.cam_mode = vga_mode; gspca_dev->cam.nmodes = ARRAY_SIZE(vga_mode); gspca_dev->cam.npkt = 128; /* number of packets per ISOC message */ /*fixme: 256 in ms-win traces*/ return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { reg_w(gspca_dev, 0x00, 0x0001, 0x2067); reg_w(gspca_dev, 0x00, 0x00d0, 0x206b); reg_w(gspca_dev, 0x00, 0x0000, 0x206c); reg_w(gspca_dev, 0x00, 0x0001, 0x2069); msleep(8); reg_w(gspca_dev, 0x00, 0x00c0, 0x206b); reg_w(gspca_dev, 0x00, 0x0000, 0x206c); reg_w(gspca_dev, 0x00, 0x0001, 0x2069); reg_r(gspca_dev, 0x20, 0x0000, 1); reg_r(gspca_dev, 0x20, 0x0000, 5); reg_r(gspca_dev, 0x23, 0x0000, 64); gspca_dbg(gspca_dev, D_PROBE, "%s%s\n", &gspca_dev->usb_buf[0x1c], &gspca_dev->usb_buf[0x30]); reg_r(gspca_dev, 0x23, 0x0001, 64); return gspca_dev->usb_err; } /* function called at start time before URB creation */ static int sd_isoc_init(struct gspca_dev *gspca_dev) { u8 mode; reg_r(gspca_dev, 0x00, 0x2520, 1); wait_status_0(gspca_dev); reg_w(gspca_dev, 0xc5, 0x0003, 0x0000); wait_status_1(gspca_dev); wait_status_0(gspca_dev); mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; reg_wb(gspca_dev, 0x25, 0x0000, 0x0004, mode); reg_r(gspca_dev, 0x25, 0x0004, 1); reg_wb(gspca_dev, 0x27, 0x0000, 0x0000, 0x06); /* 420 */ reg_r(gspca_dev, 0x27, 0x0000, 1); /* not useful.. gspca_dev->alt = 4; * use alternate setting 3 */ return gspca_dev->usb_err; } /* -- start the camera -- */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; /* initialize the JPEG header */ jpeg_define(sd->jpeg_hdr, gspca_dev->pixfmt.height, gspca_dev->pixfmt.width, 0x22); /* JPEG 411 */ /* the JPEG quality shall be 85% */ jpeg_set_qual(sd->jpeg_hdr, 85); reg_r(gspca_dev, 0x00, 0x2520, 1); msleep(8); /* start the capture */ wait_status_0(gspca_dev); reg_w(gspca_dev, 0x31, 0x0000, 0x0004); /* start request */ wait_status_1(gspca_dev); wait_status_0(gspca_dev); msleep(200); sd->pkt_seq = 0; return gspca_dev->usb_err; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* stop the capture */ wait_status_0(gspca_dev); reg_w(gspca_dev, 0x31, 0x0000, 0x0000); /* stop request */ wait_status_1(gspca_dev); wait_status_0(gspca_dev); } /* move a packet adding 0x00 after 0xff */ static void add_packet(struct gspca_dev *gspca_dev, u8 *data, int len) { int i; i = 0; do { if (data[i] == 0xff) { gspca_frame_add(gspca_dev, INTER_PACKET, data, i + 1); len -= i; data += i; *data = 0x00; i = 0; } } while (++i < len); gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; static const u8 ffd9[] = {0xff, 0xd9}; /* image packets start with: * 02 8n * with <n> bit: * 0x01: even (0) / odd (1) image * 0x02: end of image when set */ if (len < 3) return; /* empty packet */ if (*data == 0x02) { if (data[1] & 0x02) { sd->pkt_seq = !(data[1] & 1); add_packet(gspca_dev, data + 2, len - 2); gspca_frame_add(gspca_dev, LAST_PACKET, ffd9, 2); return; } if ((data[1] & 1) != sd->pkt_seq) goto err; if (gspca_dev->last_packet_type == LAST_PACKET) gspca_frame_add(gspca_dev, FIRST_PACKET, sd->jpeg_hdr, JPEG_HDR_SZ); add_packet(gspca_dev, data + 2, len - 2); return; } err: gspca_dev->last_packet_type = DISCARD_PACKET; } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: setcontrast(gspca_dev, ctrl->val); break; case V4L2_CID_HUE: sethue(gspca_dev, ctrl->val); break; case V4L2_CID_SATURATION: setcolor(gspca_dev, ctrl->val); break; case V4L2_CID_SHARPNESS: setsharpness(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 8, 1, 1); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, 0, 255, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SATURATION, 0, 8, 1, 1); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_SHARPNESS, 0, 255, 1, 0); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .isoc_init = sd_isoc_init, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x04fc, 0x1528)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { /* the video interface for isochronous transfer is 1 */ if (intf->cur_altsetting->desc.bInterfaceNumber != 1) return -ENODEV; return gspca_dev_probe2(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); |
6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. * Copyright (C) 2023 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> #include <net/mac80211.h> #include "wme.h" #include "ieee80211_i.h" #include "mesh.h" #include <linux/rhashtable.h> static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); } static const struct rhashtable_params mesh_rht_params = { .nelem_hint = 2, .automatic_shrinking = true, .key_len = ETH_ALEN, .key_offset = offsetof(struct mesh_path, dst), .head_offset = offsetof(struct mesh_path, rhash), .hashfn = mesh_table_hash, }; static const struct rhashtable_params fast_tx_rht_params = { .nelem_hint = 10, .automatic_shrinking = true, .key_len = sizeof_field(struct ieee80211_mesh_fast_tx, key), .key_offset = offsetof(struct ieee80211_mesh_fast_tx, key), .head_offset = offsetof(struct ieee80211_mesh_fast_tx, rhash), .hashfn = mesh_table_hash, }; static void __mesh_fast_tx_entry_free(void *ptr, void *tblptr) { struct ieee80211_mesh_fast_tx *entry = ptr; kfree_rcu(entry, fast_tx.rcu_head); } static void mesh_fast_tx_deinit(struct ieee80211_sub_if_data *sdata) { struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; rhashtable_free_and_destroy(&cache->rht, __mesh_fast_tx_entry_free, NULL); } static void mesh_fast_tx_init(struct ieee80211_sub_if_data *sdata) { struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; rhashtable_init(&cache->rht, &fast_tx_rht_params); INIT_HLIST_HEAD(&cache->walk_head); spin_lock_init(&cache->walk_lock); } static inline bool mpath_expired(struct mesh_path *mpath) { return (mpath->flags & MESH_PATH_ACTIVE) && time_after(jiffies, mpath->exp_time) && !(mpath->flags & MESH_PATH_FIXED); } static void mesh_path_rht_free(void *ptr, void *tblptr) { struct mesh_path *mpath = ptr; struct mesh_table *tbl = tblptr; mesh_path_free_rcu(tbl, mpath); } static void mesh_table_init(struct mesh_table *tbl) { INIT_HLIST_HEAD(&tbl->known_gates); INIT_HLIST_HEAD(&tbl->walk_head); atomic_set(&tbl->entries, 0); spin_lock_init(&tbl->gates_lock); spin_lock_init(&tbl->walk_lock); /* rhashtable_init() may fail only in case of wrong * mesh_rht_params */ WARN_ON(rhashtable_init(&tbl->rhead, &mesh_rht_params)); } static void mesh_table_free(struct mesh_table *tbl) { rhashtable_free_and_destroy(&tbl->rhead, mesh_path_rht_free, tbl); } /** * mesh_path_assign_nexthop - update mesh path next hop * * @mpath: mesh path to update * @sta: next hop to assign * * Locking: mpath->state_lock must be held when calling this function */ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) { struct sk_buff *skb; struct ieee80211_hdr *hdr; unsigned long flags; rcu_assign_pointer(mpath->next_hop, sta); spin_lock_irqsave(&mpath->frame_queue.lock, flags); skb_queue_walk(&mpath->frame_queue, skb) { hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr); } spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); } static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, struct mesh_path *gate_mpath) { struct ieee80211_hdr *hdr; struct ieee80211s_hdr *mshdr; int mesh_hdrlen, hdrlen; char *next_hop; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); if (!(mshdr->flags & MESH_FLAGS_AE)) { /* size of the fixed part of the mesh header */ mesh_hdrlen = 6; /* make room for the two extended addresses */ skb_push(skb, 2 * ETH_ALEN); memmove(skb->data, hdr, hdrlen + mesh_hdrlen); hdr = (struct ieee80211_hdr *) skb->data; /* we preserve the previous mesh header and only add * the new addresses */ mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); mshdr->flags = MESH_FLAGS_AE_A5_A6; memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN); memcpy(mshdr->eaddr2, hdr->addr4, ETH_ALEN); } /* update next hop */ hdr = (struct ieee80211_hdr *) skb->data; rcu_read_lock(); next_hop = rcu_dereference(gate_mpath->next_hop)->sta.addr; memcpy(hdr->addr1, next_hop, ETH_ALEN); rcu_read_unlock(); memcpy(hdr->addr2, gate_mpath->sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, dst_addr, ETH_ALEN); } /** * mesh_path_move_to_queue - Move or copy frames from one mpath queue to another * * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate) * @from_mpath: The failed mpath * @copy: When true, copy all the frames to the new mpath queue. When false, * move them. * * This function is used to transfer or copy frames from an unresolved mpath to * a gate mpath. The function also adds the Address Extension field and * updates the next hop. * * If a frame already has an Address Extension field, only the next hop and * destination addresses are updated. * * The gate mpath must be an active mpath with a valid mpath->next_hop. */ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct mesh_path *from_mpath, bool copy) { struct sk_buff *skb, *fskb, *tmp; struct sk_buff_head failq; unsigned long flags; if (WARN_ON(gate_mpath == from_mpath)) return; if (WARN_ON(!gate_mpath->next_hop)) return; __skb_queue_head_init(&failq); spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); skb_queue_splice_init(&from_mpath->frame_queue, &failq); spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); skb_queue_walk_safe(&failq, fskb, tmp) { if (skb_queue_len(&gate_mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN) { mpath_dbg(gate_mpath->sdata, "mpath queue full!\n"); break; } skb = skb_copy(fskb, GFP_ATOMIC); if (WARN_ON(!skb)) break; prepare_for_gate(skb, gate_mpath->dst, gate_mpath); skb_queue_tail(&gate_mpath->frame_queue, skb); if (copy) continue; __skb_unlink(fskb, &failq); kfree_skb(fskb); } mpath_dbg(gate_mpath->sdata, "Mpath queue for gate %pM has %d frames\n", gate_mpath->dst, skb_queue_len(&gate_mpath->frame_queue)); if (!copy) return; spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); skb_queue_splice(&failq, &from_mpath->frame_queue); spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); } static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; mpath = rhashtable_lookup(&tbl->rhead, dst, mesh_rht_params); if (mpath && mpath_expired(mpath)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); } return mpath; } /** * mesh_path_lookup - look up a path in the mesh path table * @sdata: local subif * @dst: hardware address (ETH_ALEN length) of destination * * Returns: pointer to the mesh path structure, or NULL if not found * * Locking: must be called within a read rcu section. */ struct mesh_path * mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(&sdata->u.mesh.mesh_paths, dst, sdata); } struct mesh_path * mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(&sdata->u.mesh.mpp_paths, dst, sdata); } static struct mesh_path * __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) { int i = 0; struct mesh_path *mpath; hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) { if (i++ == idx) break; } if (!mpath) return NULL; if (mpath_expired(mpath)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); } return mpath; } /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index * @sdata: local subif, or NULL for all entries * @idx: index * * Returns: pointer to the mesh path structure, or NULL if not found. * * Locking: must be called within a read rcu section. */ struct mesh_path * mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { return __mesh_path_lookup_by_idx(&sdata->u.mesh.mesh_paths, idx); } /** * mpp_path_lookup_by_idx - look up a path in the proxy path table by its index * @sdata: local subif, or NULL for all entries * @idx: index * * Returns: pointer to the proxy path structure, or NULL if not found. * * Locking: must be called within a read rcu section. */ struct mesh_path * mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { return __mesh_path_lookup_by_idx(&sdata->u.mesh.mpp_paths, idx); } /** * mesh_path_add_gate - add the given mpath to a mesh gate to our path table * @mpath: gate path to add to table * * Returns: 0 on success, -EEXIST */ int mesh_path_add_gate(struct mesh_path *mpath) { struct mesh_table *tbl; int err; rcu_read_lock(); tbl = &mpath->sdata->u.mesh.mesh_paths; spin_lock_bh(&mpath->state_lock); if (mpath->is_gate) { err = -EEXIST; spin_unlock_bh(&mpath->state_lock); goto err_rcu; } mpath->is_gate = true; mpath->sdata->u.mesh.num_gates++; spin_lock(&tbl->gates_lock); hlist_add_head_rcu(&mpath->gate_list, &tbl->known_gates); spin_unlock(&tbl->gates_lock); spin_unlock_bh(&mpath->state_lock); mpath_dbg(mpath->sdata, "Mesh path: Recorded new gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); err = 0; err_rcu: rcu_read_unlock(); return err; } /** * mesh_gate_del - remove a mesh gate from the list of known gates * @tbl: table which holds our list of known gates * @mpath: gate mpath */ static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { lockdep_assert_held(&mpath->state_lock); if (!mpath->is_gate) return; mpath->is_gate = false; spin_lock_bh(&tbl->gates_lock); hlist_del_rcu(&mpath->gate_list); mpath->sdata->u.mesh.num_gates--; spin_unlock_bh(&tbl->gates_lock); mpath_dbg(mpath->sdata, "Mesh path: Deleted gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); } /** * mesh_gate_num - number of gates known to this interface * @sdata: subif data * * Returns: The number of gates */ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) { return sdata->u.mesh.num_gates; } static struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata, const u8 *dst, gfp_t gfp_flags) { struct mesh_path *new_mpath; new_mpath = kzalloc(sizeof(struct mesh_path), gfp_flags); if (!new_mpath) return NULL; memcpy(new_mpath->dst, dst, ETH_ALEN); eth_broadcast_addr(new_mpath->rann_snd_addr); new_mpath->is_root = false; new_mpath->sdata = sdata; new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); new_mpath->exp_time = jiffies; spin_lock_init(&new_mpath->state_lock); timer_setup(&new_mpath->timer, mesh_path_timer, 0); return new_mpath; } static void mesh_fast_tx_entry_free(struct mesh_tx_cache *cache, struct ieee80211_mesh_fast_tx *entry) { hlist_del_rcu(&entry->walk_list); rhashtable_remove_fast(&cache->rht, &entry->rhash, fast_tx_rht_params); kfree_rcu(entry, fast_tx.rcu_head); } struct ieee80211_mesh_fast_tx * mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, struct ieee80211_mesh_fast_tx_key *key) { struct ieee80211_mesh_fast_tx *entry; struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (!entry) return NULL; if (!(entry->mpath->flags & MESH_PATH_ACTIVE) || mpath_expired(entry->mpath)) { spin_lock_bh(&cache->walk_lock); entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (entry) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); return NULL; } mesh_path_refresh(sdata, entry->mpath, NULL); if (entry->mppath) entry->mppath->exp_time = jiffies; entry->timestamp = jiffies; return entry; } void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct mesh_path *mpath) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_mesh_fast_tx *entry, *prev; struct ieee80211_mesh_fast_tx build = {}; struct ieee80211s_hdr *meshhdr; struct mesh_tx_cache *cache; struct ieee80211_key *key; struct mesh_path *mppath; struct sta_info *sta; u8 *qc; if (sdata->noack_map || !ieee80211_is_data_qos(hdr->frame_control)) return; build.fast_tx.hdr_len = ieee80211_hdrlen(hdr->frame_control); meshhdr = (struct ieee80211s_hdr *)(skb->data + build.fast_tx.hdr_len); build.hdrlen = ieee80211_get_mesh_hdrlen(meshhdr); cache = &sdata->u.mesh.tx_cache; if (atomic_read(&cache->rht.nelems) >= MESH_FAST_TX_CACHE_MAX_SIZE) return; sta = rcu_dereference(mpath->next_hop); if (!sta) return; build.key.type = MESH_FAST_TX_TYPE_LOCAL; if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) { /* This is required to keep the mppath alive */ mppath = mpp_path_lookup(sdata, meshhdr->eaddr1); if (!mppath) return; build.mppath = mppath; if (!ether_addr_equal(meshhdr->eaddr2, sdata->vif.addr)) build.key.type = MESH_FAST_TX_TYPE_PROXIED; } else if (ieee80211_has_a4(hdr->frame_control)) { mppath = mpath; } else { return; } if (!ether_addr_equal(hdr->addr4, sdata->vif.addr)) build.key.type = MESH_FAST_TX_TYPE_FORWARDED; /* rate limit, in case fast xmit can't be enabled */ if (mppath->fast_tx_check == jiffies) return; mppath->fast_tx_check = jiffies; /* * Same use of the sta lock as in ieee80211_check_fast_xmit, in order * to protect against concurrent sta key updates. */ spin_lock_bh(&sta->lock); key = rcu_access_pointer(sta->ptk[sta->ptk_idx]); if (!key) key = rcu_access_pointer(sdata->default_unicast_key); build.fast_tx.key = key; if (key) { bool gen_iv, iv_spc; gen_iv = key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; iv_spc = key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) || (key->flags & KEY_FLAG_TAINTED)) goto unlock_sta; switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: if (gen_iv) build.fast_tx.pn_offs = build.fast_tx.hdr_len; if (gen_iv || iv_spc) build.fast_tx.hdr_len += IEEE80211_CCMP_HDR_LEN; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: if (gen_iv) build.fast_tx.pn_offs = build.fast_tx.hdr_len; if (gen_iv || iv_spc) build.fast_tx.hdr_len += IEEE80211_GCMP_HDR_LEN; break; default: goto unlock_sta; } } memcpy(build.key.addr, mppath->dst, ETH_ALEN); build.timestamp = jiffies; build.fast_tx.band = info->band; build.fast_tx.da_offs = offsetof(struct ieee80211_hdr, addr3); build.fast_tx.sa_offs = offsetof(struct ieee80211_hdr, addr4); build.mpath = mpath; memcpy(build.hdr, meshhdr, build.hdrlen); memcpy(build.hdr + build.hdrlen, rfc1042_header, sizeof(rfc1042_header)); build.hdrlen += sizeof(rfc1042_header); memcpy(build.fast_tx.hdr, hdr, build.fast_tx.hdr_len); hdr = (struct ieee80211_hdr *)build.fast_tx.hdr; if (build.fast_tx.key) hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); qc = ieee80211_get_qos_ctl(hdr); qc[1] |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8; entry = kmemdup(&build, sizeof(build), GFP_ATOMIC); if (!entry) goto unlock_sta; spin_lock(&cache->walk_lock); prev = rhashtable_lookup_get_insert_fast(&cache->rht, &entry->rhash, fast_tx_rht_params); if (IS_ERR(prev)) { kfree(entry); goto unlock_cache; } /* * replace any previous entry in the hash table, in case we're * replacing it with a different type (e.g. mpath -> mpp) */ if (unlikely(prev)) { rhashtable_replace_fast(&cache->rht, &prev->rhash, &entry->rhash, fast_tx_rht_params); hlist_del_rcu(&prev->walk_list); kfree_rcu(prev, fast_tx.rcu_head); } hlist_add_head(&entry->walk_list, &cache->walk_head); unlock_cache: spin_unlock(&cache->walk_lock); unlock_sta: spin_unlock_bh(&sta->lock); } void mesh_fast_tx_gc(struct ieee80211_sub_if_data *sdata) { unsigned long timeout = msecs_to_jiffies(MESH_FAST_TX_CACHE_TIMEOUT); struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; if (atomic_read(&cache->rht.nelems) < MESH_FAST_TX_CACHE_THRESHOLD_SIZE) return; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (!time_is_after_jiffies(entry->timestamp + timeout)) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); } void mesh_fast_tx_flush_mpath(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (entry->mpath == mpath) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); } void mesh_fast_tx_flush_sta(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (rcu_access_pointer(entry->mpath->next_hop) == sta) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); } void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx_key key = {}; struct ieee80211_mesh_fast_tx *entry; int i; ether_addr_copy(key.addr, addr); spin_lock_bh(&cache->walk_lock); for (i = 0; i < NUM_MESH_FAST_TX_TYPE; i++) { key.type = i; entry = rhashtable_lookup_fast(&cache->rht, &key, fast_tx_rht_params); if (entry) mesh_fast_tx_entry_free(cache, entry); } spin_unlock_bh(&cache->walk_lock); } /** * mesh_path_add - allocate and add a new path to the mesh path table * @sdata: local subif * @dst: destination address of the path (ETH_ALEN length) * * Returns: 0 on success * * State: the initial state of the new path is set to 0 */ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) { struct mesh_table *tbl; struct mesh_path *mpath, *new_mpath; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ return ERR_PTR(-EOPNOTSUPP); if (is_multicast_ether_addr(dst)) return ERR_PTR(-EOPNOTSUPP); if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) return ERR_PTR(-ENOSPC); new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); if (!new_mpath) return ERR_PTR(-ENOMEM); tbl = &sdata->u.mesh.mesh_paths; spin_lock_bh(&tbl->walk_lock); mpath = rhashtable_lookup_get_insert_fast(&tbl->rhead, &new_mpath->rhash, mesh_rht_params); if (!mpath) hlist_add_head(&new_mpath->walk_list, &tbl->walk_head); spin_unlock_bh(&tbl->walk_lock); if (mpath) { kfree(new_mpath); if (IS_ERR(mpath)) return mpath; new_mpath = mpath; } sdata->u.mesh.mesh_paths_generation++; return new_mpath; } int mpp_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst, const u8 *mpp) { struct mesh_table *tbl; struct mesh_path *new_mpath; int ret; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ return -EOPNOTSUPP; if (is_multicast_ether_addr(dst)) return -EOPNOTSUPP; new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); if (!new_mpath) return -ENOMEM; memcpy(new_mpath->mpp, mpp, ETH_ALEN); tbl = &sdata->u.mesh.mpp_paths; spin_lock_bh(&tbl->walk_lock); ret = rhashtable_lookup_insert_fast(&tbl->rhead, &new_mpath->rhash, mesh_rht_params); if (!ret) hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head); spin_unlock_bh(&tbl->walk_lock); if (ret) kfree(new_mpath); else mesh_fast_tx_flush_addr(sdata, dst); sdata->u.mesh.mpp_paths_generation++; return ret; } /** * mesh_plink_broken - deactivates paths and sends perr when a link breaks * * @sta: broken peer link * * This function must be called from the rate control algorithm if enough * delivery errors suggest that a peer link is no longer usable. */ void mesh_plink_broken(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; rcu_read_lock(); hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) { if (rcu_access_pointer(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(sdata, sdata->u.mesh.mshcfg.element_ttl, mpath->dst, mpath->sn, WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } rcu_read_unlock(); } static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; spin_lock_bh(&mpath->state_lock); mpath->flags |= MESH_PATH_RESOLVING | MESH_PATH_DELETED; mesh_gate_del(tbl, mpath); spin_unlock_bh(&mpath->state_lock); timer_shutdown_sync(&mpath->timer); atomic_dec(&sdata->u.mesh.mpaths); atomic_dec(&tbl->entries); mesh_path_flush_pending(mpath); kfree_rcu(mpath, rcu); } static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) { hlist_del_rcu(&mpath->walk_list); rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params); if (tbl == &mpath->sdata->u.mesh.mpp_paths) mesh_fast_tx_flush_addr(mpath->sdata, mpath->dst); else mesh_fast_tx_flush_mpath(mpath); mesh_path_free_rcu(tbl, mpath); } /** * mesh_path_flush_by_nexthop - Deletes mesh paths if their next hop matches * * @sta: mesh peer to match * * RCU notes: this function is called when a mesh plink transitions from * PLINK_ESTAB to any other state, since PLINK_ESTAB state is the only one that * allows path creation. This will happen before the sta can be freed (because * sta_info_destroy() calls this) so any reader in a rcu read block will be * protected against the plink disappearing. */ void mesh_path_flush_by_nexthop(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if (rcu_access_pointer(mpath->next_hop) == sta) __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, const u8 *proxy) { struct mesh_table *tbl = &sdata->u.mesh.mpp_paths; struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if (ether_addr_equal(mpath->mpp, proxy)) __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } static void table_flush_by_iface(struct mesh_table *tbl) { struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } /** * mesh_path_flush_by_iface - Deletes all mesh paths associated with a given iface * * @sdata: interface data to match * * This function deletes both mesh paths as well as mesh portal paths. */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { table_flush_by_iface(&sdata->u.mesh.mesh_paths); table_flush_by_iface(&sdata->u.mesh.mpp_paths); } /** * table_path_del - delete a path from the mesh or mpp table * * @tbl: mesh or mpp path table * @sdata: local subif * @addr: dst address (ETH_ALEN length) * * Returns: 0 if successful */ static int table_path_del(struct mesh_table *tbl, struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_path *mpath; spin_lock_bh(&tbl->walk_lock); mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params); if (!mpath) { spin_unlock_bh(&tbl->walk_lock); return -ENXIO; } __mesh_path_del(tbl, mpath); spin_unlock_bh(&tbl->walk_lock); return 0; } /** * mesh_path_del - delete a mesh path from the table * * @sdata: local subif * @addr: dst address (ETH_ALEN length) * * Returns: 0 if successful */ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) { int err; /* flush relevant mpp entries first */ mpp_flush_by_proxy(sdata, addr); err = table_path_del(&sdata->u.mesh.mesh_paths, sdata, addr); sdata->u.mesh.mesh_paths_generation++; return err; } /** * mesh_path_tx_pending - sends pending frames in a mesh path queue * * @mpath: mesh path to activate * * Locking: the state_lock of the mpath structure must NOT be held when calling * this function. */ void mesh_path_tx_pending(struct mesh_path *mpath) { if (mpath->flags & MESH_PATH_ACTIVE) ieee80211_add_pending_skbs(mpath->sdata->local, &mpath->frame_queue); } /** * mesh_path_send_to_gates - sends pending frames to all known mesh gates * * @mpath: mesh path whose queue will be emptied * * If there is only one gate, the frames are transferred from the failed mpath * queue to that gate's queue. If there are more than one gates, the frames * are copied from each gate to the next. After frames are copied, the * mpath queues are emptied onto the transmission queue. * * Returns: 0 on success, -EHOSTUNREACH */ int mesh_path_send_to_gates(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; struct mesh_table *tbl; struct mesh_path *from_mpath = mpath; struct mesh_path *gate; bool copy = false; tbl = &sdata->u.mesh.mesh_paths; rcu_read_lock(); hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { if (gate->flags & MESH_PATH_ACTIVE) { mpath_dbg(sdata, "Forwarding to %pM\n", gate->dst); mesh_path_move_to_queue(gate, from_mpath, copy); from_mpath = gate; copy = true; } else { mpath_dbg(sdata, "Not forwarding to %pM (flags %#x)\n", gate->dst, gate->flags); } } hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { mpath_dbg(sdata, "Sending to %pM\n", gate->dst); mesh_path_tx_pending(gate); } rcu_read_unlock(); return (from_mpath == mpath) ? -EHOSTUNREACH : 0; } /** * mesh_path_discard_frame - discard a frame whose path could not be resolved * * @sdata: network subif the frame was to be sent through * @skb: frame to discard * * Locking: the function must me called within a rcu_read_lock region */ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { ieee80211_free_txskb(&sdata->local->hw, skb); sdata->u.mesh.mshstats.dropped_frames_no_route++; } /** * mesh_path_flush_pending - free the pending queue of a mesh path * * @mpath: mesh path whose queue has to be freed * * Locking: the function must me called within a rcu_read_lock region */ void mesh_path_flush_pending(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq, *tmp; struct sk_buff *skb; while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) mesh_path_discard_frame(mpath->sdata, skb); spin_lock_bh(&ifmsh->mesh_preq_queue_lock); list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) { if (ether_addr_equal(mpath->dst, preq->dst)) { list_del(&preq->list); kfree(preq); --ifmsh->preq_queue_len; } } spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); } /** * mesh_path_fix_nexthop - force a specific next hop for a mesh path * * @mpath: the mesh path to modify * @next_hop: the next hop to force * * Locking: this function must be called holding mpath->state_lock */ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) { spin_lock_bh(&mpath->state_lock); mesh_path_assign_nexthop(mpath, next_hop); mpath->sn = 0xffff; mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); mesh_fast_tx_flush_mpath(mpath); spin_unlock_bh(&mpath->state_lock); ewma_mesh_fail_avg_init(&next_hop->mesh->fail_avg); /* init it at a low value - 0 start is tricky */ ewma_mesh_fail_avg_add(&next_hop->mesh->fail_avg, 1); mesh_path_tx_pending(mpath); } void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) { mesh_table_init(&sdata->u.mesh.mesh_paths); mesh_table_init(&sdata->u.mesh.mpp_paths); mesh_fast_tx_init(sdata); } static void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } void mesh_path_expire(struct ieee80211_sub_if_data *sdata) { mesh_path_tbl_expire(sdata, &sdata->u.mesh.mesh_paths); mesh_path_tbl_expire(sdata, &sdata->u.mesh.mpp_paths); } void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata) { mesh_fast_tx_deinit(sdata); mesh_table_free(&sdata->u.mesh.mesh_paths); mesh_table_free(&sdata->u.mesh.mpp_paths); } |
24 7 40 3 41 42 37 25 36 26 25 1 37 37 37 22 28 6 28 35 28 26 37 39 38 37 37 37 34 41 41 1 22 14 4 21 3 6 41 8 8 1 37 1 21 36 28 26 19 22 22 2 12 1 8 40 34 37 37 39 13 34 37 32 37 35 35 41 32 29 41 24 41 24 24 24 40 12 37 41 1 41 1 40 36 1 28 1 41 41 41 41 41 41 41 41 41 41 41 13 13 25 1 25 25 5 1 26 26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 | /* Bottleneck Bandwidth and RTT (BBR) congestion control * * BBR congestion control computes the sending rate based on the delivery * rate (throughput) estimated from ACKs. In a nutshell: * * On each ACK, update our model of the network path: * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) * min_rtt = windowed_min(rtt, 10 seconds) * pacing_rate = pacing_gain * bottleneck_bandwidth * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) * * The core algorithm does not react directly to packet losses or delays, * although BBR may adjust the size of next send per ACK when loss is * observed, or adjust the sending rate if it estimates there is a * traffic policer, in order to keep the drop rate reasonable. * * Here is a state transition diagram for BBR: * * | * V * +---> STARTUP ----+ * | | | * | V | * | DRAIN ----+ * | | | * | V | * +---> PROBE_BW ----+ * | ^ | | * | | | | * | +----+ | * | | * +---- PROBE_RTT <--+ * * A BBR flow starts in STARTUP, and ramps up its sending rate quickly. * When it estimates the pipe is full, it enters DRAIN to drain the queue. * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT. * A long-lived BBR flow spends the vast majority of its time remaining * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth * in a fair manner, with a small, bounded queue. *If* a flow has been * continuously sending for the entire min_rtt window, and hasn't seen an RTT * sample that matches or decreases its min_rtt estimate for 10 seconds, then * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if * we estimated that we reached the full bw of the pipe then we enter PROBE_BW; * otherwise we enter STARTUP to try to fill the pipe. * * BBR is described in detail in: * "BBR: Congestion-Based Congestion Control", * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. * * There is a public e-mail list for discussing BBR development and testing: * https://groups.google.com/forum/#!forum/bbr-dev * * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, * otherwise TCP stack falls back to an internal pacing using one high * resolution timer per TCP socket and may use more resources. */ #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/module.h> #include <net/tcp.h> #include <linux/inet_diag.h> #include <linux/inet.h> #include <linux/random.h> #include <linux/win_minmax.h> /* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. * Since the minimum window is >=4 packets, the lower bound isn't * an issue. The upper bound isn't an issue with existing technologies. */ #define BW_SCALE 24 #define BW_UNIT (1 << BW_SCALE) #define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ #define BBR_UNIT (1 << BBR_SCALE) /* BBR has the following modes for deciding how fast to send: */ enum bbr_mode { BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ BBR_DRAIN, /* drain any queue created during startup */ BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ }; /* BBR congestion control block */ struct bbr { u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ u32 min_rtt_stamp; /* timestamp of min_rtt_us */ u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ u32 rtt_cnt; /* count of packet-timed rounds elapsed */ u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ u64 cycle_mstamp; /* time of this cycle phase start */ u32 mode:3, /* current bbr_mode in state machine */ prev_ca_state:3, /* CA state on previous ACK */ packet_conservation:1, /* use packet conservation? */ round_start:1, /* start of packet-timed tx->ack round? */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ unused:13, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ u32 lt_last_delivered; /* LT intvl start: tp->delivered */ u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ u32 lt_last_lost; /* LT intvl start: tp->lost */ u32 pacing_gain:10, /* current gain for setting pacing rate */ cwnd_gain:10, /* current gain for setting cwnd */ full_bw_reached:1, /* reached full bw in Startup? */ full_bw_cnt:2, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ has_seen_rtt:1, /* have we seen an RTT sample yet? */ unused_b:5; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ /* For tracking ACK aggregation: */ u64 ack_epoch_mstamp; /* start of ACK sampling epoch */ u16 extra_acked[2]; /* max excess data ACKed in epoch */ u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ extra_acked_win_idx:1, /* current index in extra_acked array */ unused_c:6; }; #define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ /* Window length of bw filter (in rounds): */ static const int bbr_bw_rtts = CYCLE_LEN + 2; /* Window length of min_rtt filter (in sec): */ static const u32 bbr_min_rtt_win_sec = 10; /* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ static const u32 bbr_probe_rtt_mode_ms = 200; /* Skip TSO below the following bandwidth (bits/sec): */ static const int bbr_min_tso_rate = 1200000; /* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. * In order to help drive the network toward lower queues and low latency while * maintaining high utilization, the average pacing rate aims to be slightly * lower than the estimated bandwidth. This is an important aspect of the * design. */ static const int bbr_pacing_margin_percent = 1; /* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain * that will allow a smoothly increasing pacing rate that will double each RTT * and send the same number of packets per RTT that an un-paced, slow-starting * Reno or CUBIC flow would: */ static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; /* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain * the queue created in BBR_STARTUP in a single round: */ static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; /* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */ static const int bbr_cwnd_gain = BBR_UNIT * 2; /* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ static const int bbr_pacing_gain[] = { BBR_UNIT * 5 / 4, /* probe for more available bw */ BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ }; /* Randomize the starting gain cycling phase over N phases: */ static const u32 bbr_cycle_rand = 7; /* Try to keep at least this many packets in flight, if things go smoothly. For * smooth functioning, a sliding window protocol ACKing every other packet * needs at least 4 packets in flight: */ static const u32 bbr_cwnd_min_target = 4; /* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ /* If bw has increased significantly (1.25x), there may be more bw available: */ static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; /* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ static const u32 bbr_full_bw_cnt = 3; /* "long-term" ("LT") bandwidth estimator parameters... */ /* The minimum number of rounds in an LT bw sampling interval: */ static const u32 bbr_lt_intvl_min_rtts = 4; /* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ static const u32 bbr_lt_loss_thresh = 50; /* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; /* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ static const u32 bbr_lt_bw_diff = 4000 / 8; /* If we estimate we're policed, use lt_bw for this many round trips: */ static const u32 bbr_lt_bw_max_rtts = 48; /* Gain factor for adding extra_acked to target cwnd: */ static const int bbr_extra_acked_gain = BBR_UNIT; /* Window length of extra_acked window. */ static const u32 bbr_extra_acked_win_rtts = 5; /* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */ static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; /* Time period for clamping cwnd increment due to ack aggregation */ static const u32 bbr_extra_acked_max_us = 100 * 1000; static void bbr_check_probe_rtt_done(struct sock *sk); /* Do we estimate that STARTUP filled the pipe? */ static bool bbr_full_bw_reached(const struct sock *sk) { const struct bbr *bbr = inet_csk_ca(sk); return bbr->full_bw_reached; } /* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ static u32 bbr_max_bw(const struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); return minmax_get(&bbr->bw); } /* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ static u32 bbr_bw(const struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); } /* Return maximum extra acked in past k-2k round trips, * where k = bbr_extra_acked_win_rtts. */ static u16 bbr_extra_acked(const struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); return max(bbr->extra_acked[0], bbr->extra_acked[1]); } /* Return rate in bytes per second, optionally with a gain. * The order here is chosen carefully to avoid overflow of u64. This should * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. */ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) { unsigned int mss = tcp_sk(sk)->mss_cache; rate *= mss; rate *= gain; rate >>= BBR_SCALE; rate *= USEC_PER_SEC / 100 * (100 - bbr_pacing_margin_percent); return rate >> BW_SCALE; } /* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) { u64 rate = bw; rate = bbr_rate_bytes_per_sec(sk, rate, gain); rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)); return rate; } /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u64 bw; u32 rtt_us; if (tp->srtt_us) { /* any RTT sample yet? */ rtt_us = max(tp->srtt_us >> 3, 1U); bbr->has_seen_rtt = 1; } else { /* no RTT sample yet */ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ } bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); WRITE_ONCE(sk->sk_pacing_rate, bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain)); } /* Pace using current bw estimate and a gain factor. */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain); if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) bbr_init_pacing_rate_from_rtt(sk); if (bbr_full_bw_reached(sk) || rate > READ_ONCE(sk->sk_pacing_rate)) WRITE_ONCE(sk->sk_pacing_rate, rate); } /* override sysctl_tcp_min_tso_segs */ __bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) { return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2; } static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 segs, bytes; /* Sort of tcp_tso_autosize() but ignoring * driver provided sk_gso_max_size. */ bytes = min_t(unsigned long, READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift), GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); return min(segs, 0x7FU); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ static void bbr_save_cwnd(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */ else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp)); } __bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); if (event == CA_EVENT_TX_START && tp->app_limited) { bbr->idle_restart = 1; bbr->ack_epoch_mstamp = tp->tcp_mstamp; bbr->ack_epoch_acked = 0; /* Avoid pointless buffer overflows: pace at est. bw if we don't * need more speed (we're restarting from idle and app-limited). */ if (bbr->mode == BBR_PROBE_BW) bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); else if (bbr->mode == BBR_PROBE_RTT) bbr_check_probe_rtt_done(sk); } } /* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: * * bdp = ceil(bw * min_rtt * gain) * * The key factor, gain, controls the amount of queue. While a small gain * builds a smaller queue, it becomes more vulnerable to noise in RTT * measurements (e.g., delayed ACKs or other ACK compression effects). This * noise may cause BBR to under-estimate the rate. */ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) { struct bbr *bbr = inet_csk_ca(sk); u32 bdp; u64 w; /* If we've never had a valid RTT sample, cap cwnd at the initial * default. This should only happen when the connection is not using TCP * timestamps and has retransmitted all of the SYN/SYNACK/data packets * ACKed so far. In this case, an RTO can cut cwnd to 1, in which * case we need to slow-start up toward something safe: TCP_INIT_CWND. */ if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ w = (u64)bw * bbr->min_rtt_us; /* Apply a gain to the given value, remove the BW_SCALE shift, and * round the value up to avoid a negative feedback loop. */ bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; return bdp; } /* To achieve full performance in high-speed paths, we budget enough cwnd to * fit full-sized skbs in-flight on both end hosts to fully utilize the path: * - one skb in sending host Qdisc, * - one skb in sending host TSO/GSO engine * - one skb being received by receiver host LRO/GRO/delayed-ACK engine * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, * which allows 2 outstanding 2-packet sequences, to try to keep pipe * full even with ACK-every-other-packet delayed ACKs. */ static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) { struct bbr *bbr = inet_csk_ca(sk); /* Allow enough full-sized skbs in flight to utilize end systems. */ cwnd += 3 * bbr_tso_segs_goal(sk); /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == 0) cwnd += 2; return cwnd; } /* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) { u32 inflight; inflight = bbr_bdp(sk, bw, gain); inflight = bbr_quantization_budget(sk, inflight); return inflight; } /* With pacing at lower layers, there's often less data "in the network" than * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), * we often have several skbs queued in the pacing layer with a pre-scheduled * earliest departure time (EDT). BBR adapts its pacing rate based on the * inflight level that it estimates has already been "baked in" by previous * departure time decisions. We calculate a rough estimate of the number of our * packets that might be in the network at the earliest departure time for the * next skb scheduled: * in_network_at_edt = inflight_at_edt - (EDT - now) * bw * If we're increasing inflight, then we want to know if the transmit of the * EDT skb will push inflight above the target, so inflight_at_edt includes * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight, * then estimate if inflight will sink too low just before the EDT transmit. */ static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u64 now_ns, edt_ns, interval_us; u32 interval_delivered, inflight_at_edt; now_ns = tp->tcp_clock_cache; edt_ns = max(tp->tcp_wstamp_ns, now_ns); interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC); interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE; inflight_at_edt = inflight_now; if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */ inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */ if (interval_delivered >= inflight_at_edt) return 0; return inflight_at_edt - interval_delivered; } /* Find the cwnd increment based on estimate of ack aggregation */ static u32 bbr_ack_aggregation_cwnd(struct sock *sk) { u32 max_aggr_cwnd, aggr_cwnd = 0; if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) / BW_UNIT; aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) >> BBR_SCALE; aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); } return aggr_cwnd; } /* An optimization in BBR to reduce losses: On the first round of recovery, we * follow the packet conservation principle: send P packets per P packets acked. * After that, we slow-start and send at most 2*P packets per P packets acked. * After recovery finishes, or upon undo, we restore the cwnd we had when * recovery started (capped by the target cwnd based on estimated BDP). * * TODO(ycheng/ncardwell): implement a rate-based approach. */ static bool bbr_set_cwnd_to_recover_or_restore( struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; u32 cwnd = tcp_snd_cwnd(tp); /* An ACK for P pkts should release at most 2*P packets. We do this * in two steps. First, here we deduct the number of lost packets. * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. */ if (rs->losses > 0) cwnd = max_t(s32, cwnd - rs->losses, 1); if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { /* Starting 1st round of Recovery, so do packet conservation. */ bbr->packet_conservation = 1; bbr->next_rtt_delivered = tp->delivered; /* start round now */ /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ cwnd = tcp_packets_in_flight(tp) + acked; } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { /* Exiting loss recovery; restore cwnd saved before recovery. */ cwnd = max(cwnd, bbr->prior_cwnd); bbr->packet_conservation = 0; } bbr->prev_ca_state = state; if (bbr->packet_conservation) { *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); return true; /* yes, using packet conservation */ } *new_cwnd = cwnd; return false; } /* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss * has drawn us down below target), or snap down to target if we're above it. */ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, u32 acked, u32 bw, int gain) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0; if (!acked) goto done; /* no packet fully ACKed; just apply caps */ if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) goto done; target_cwnd = bbr_bdp(sk, bw, gain); /* Increment the cwnd to account for excess ACKed data that seems * due to aggregation (of data and/or ACKs) visible in the ACK stream. */ target_cwnd += bbr_ack_aggregation_cwnd(sk); target_cwnd = bbr_quantization_budget(sk, target_cwnd); /* If we're below target cwnd, slow start cwnd toward target cwnd. */ if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ cwnd = min(cwnd + acked, target_cwnd); else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) cwnd = cwnd + acked; cwnd = max(cwnd, bbr_cwnd_min_target); done: tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */ if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target)); } /* End cycle phase if it's time and/or we hit the phase's in-flight target. */ static bool bbr_is_next_cycle_phase(struct sock *sk, const struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); bool is_full_length = tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) > bbr->min_rtt_us; u32 inflight, bw; /* The pacing_gain of 1.0 paces at the estimated bw to try to fully * use the pipe without increasing the queue. */ if (bbr->pacing_gain == BBR_UNIT) return is_full_length; /* just use wall clock time */ inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); bw = bbr_max_bw(sk); /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is * small (e.g. on a LAN). We do not persist if packets are lost, since * a path with small buffers may not hold that much. */ if (bbr->pacing_gain > BBR_UNIT) return is_full_length && (rs->losses || /* perhaps pacing_gain*BDP won't fit */ inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); /* A pacing_gain < 1.0 tries to drain extra queue we added if bw * probing didn't find more bw. If inflight falls to match BDP then we * estimate queue is drained; persisting would underutilize the pipe. */ return is_full_length || inflight <= bbr_inflight(sk, bw, BBR_UNIT); } static void bbr_advance_cycle_phase(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); bbr->cycle_mstamp = tp->delivered_mstamp; } /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ static void bbr_update_cycle_phase(struct sock *sk, const struct rate_sample *rs) { struct bbr *bbr = inet_csk_ca(sk); if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) bbr_advance_cycle_phase(sk); } static void bbr_reset_startup_mode(struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); bbr->mode = BBR_STARTUP; } static void bbr_reset_probe_bw_mode(struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); bbr->mode = BBR_PROBE_BW; bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand); bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ } static void bbr_reset_mode(struct sock *sk) { if (!bbr_full_bw_reached(sk)) bbr_reset_startup_mode(sk); else bbr_reset_probe_bw_mode(sk); } /* Start a new long-term sampling interval. */ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC); bbr->lt_last_delivered = tp->delivered; bbr->lt_last_lost = tp->lost; bbr->lt_rtt_cnt = 0; } /* Completely reset long-term bandwidth sampling. */ static void bbr_reset_lt_bw_sampling(struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); bbr->lt_bw = 0; bbr->lt_use_bw = 0; bbr->lt_is_sampling = false; bbr_reset_lt_bw_sampling_interval(sk); } /* Long-term bw sampling interval is done. Estimate whether we're policed. */ static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) { struct bbr *bbr = inet_csk_ca(sk); u32 diff; if (bbr->lt_bw) { /* do we have bw from a previous interval? */ /* Is new bw close to the lt_bw from the previous interval? */ diff = abs(bw - bbr->lt_bw); if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= bbr_lt_bw_diff)) { /* All criteria are met; estimate we're policed. */ bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ bbr->lt_use_bw = 1; bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ bbr->lt_rtt_cnt = 0; return; } } bbr->lt_bw = bw; bbr_reset_lt_bw_sampling_interval(sk); } /* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and * explicitly models their policed rate, to reduce unnecessary losses. We * estimate that we're policed if we see 2 consecutive sampling intervals with * consistent throughput and high packet loss. If we think we're being policed, * set lt_bw to the "long-term" average delivery rate from those 2 intervals. */ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u32 lost, delivered; u64 bw; u32 t; if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ if (bbr->mode == BBR_PROBE_BW && bbr->round_start && ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ } return; } /* Wait for the first loss before sampling, to let the policer exhaust * its tokens and estimate the steady-state rate allowed by the policer. * Starting samples earlier includes bursts that over-estimate the bw. */ if (!bbr->lt_is_sampling) { if (!rs->losses) return; bbr_reset_lt_bw_sampling_interval(sk); bbr->lt_is_sampling = true; } /* To avoid underestimates, reset sampling if we run out of data. */ if (rs->is_app_limited) { bbr_reset_lt_bw_sampling(sk); return; } if (bbr->round_start) bbr->lt_rtt_cnt++; /* count round trips in this interval */ if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) return; /* sampling interval needs to be longer */ if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { bbr_reset_lt_bw_sampling(sk); /* interval is too long */ return; } /* End sampling interval when a packet is lost, so we estimate the * policer tokens were exhausted. Stopping the sampling before the * tokens are exhausted under-estimates the policed rate. */ if (!rs->losses) return; /* Calculate packets lost and delivered in sampling interval. */ lost = tp->lost - bbr->lt_last_lost; delivered = tp->delivered - bbr->lt_last_delivered; /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) return; /* Find average delivery rate in this sampling interval. */ t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp; if ((s32)t < 1) return; /* interval is less than one ms, so wait */ /* Check if can multiply without overflow */ if (t >= ~0U / USEC_PER_MSEC) { bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ return; } t *= USEC_PER_MSEC; bw = (u64)delivered * BW_UNIT; do_div(bw, t); bbr_lt_bw_interval_done(sk, bw); } /* Estimate the bandwidth based on how fast packets are delivered */ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u64 bw; bbr->round_start = 0; if (rs->delivered < 0 || rs->interval_us <= 0) return; /* Not a valid observation */ /* See if we've reached the next RTT */ if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { bbr->next_rtt_delivered = tp->delivered; bbr->rtt_cnt++; bbr->round_start = 1; bbr->packet_conservation = 0; } bbr_lt_bw_sampling(sk, rs); /* Divide delivered by the interval to find a (lower bound) bottleneck * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. */ bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); /* If this sample is application-limited, it is likely to have a very * low delivered count that represents application behavior rather than * the available network rate. Such a sample could drag down estimated * bw, causing needless slow-down. Thus, to continue to send at the * last measured network rate, we filter out app-limited samples unless * they describe the path bw at least as well as our bw model. * * So the goal during app-limited phase is to proceed with the best * network rate no matter how long. We automatically leave this * phase when app writes faster than the network can deliver :) */ if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { /* Incorporate new sample into our max bw filter. */ minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); } } /* Estimates the windowed max degree of ack aggregation. * This is used to provision extra in-flight data to keep sending during * inter-ACK silences. * * Degree of ack aggregation is estimated as extra data acked beyond expected. * * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval" * cwnd += max_extra_acked * * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). * Max filter is an approximate sliding window of 5-10 (packet timed) round * trips. */ static void bbr_update_ack_aggregation(struct sock *sk, const struct rate_sample *rs) { u32 epoch_us, expected_acked, extra_acked; struct bbr *bbr = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || rs->delivered < 0 || rs->interval_us <= 0) return; if (bbr->round_start) { bbr->extra_acked_win_rtts = min(0x1F, bbr->extra_acked_win_rtts + 1); if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { bbr->extra_acked_win_rtts = 0; bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? 0 : 1; bbr->extra_acked[bbr->extra_acked_win_idx] = 0; } } /* Compute how many packets we expected to be delivered over epoch. */ epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp, bbr->ack_epoch_mstamp); expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT; /* Reset the aggregation epoch if ACK rate is below expected rate or * significantly large no. of ack received since epoch (potentially * quite old epoch). */ if (bbr->ack_epoch_acked <= expected_acked || (bbr->ack_epoch_acked + rs->acked_sacked >= bbr_ack_epoch_acked_reset_thresh)) { bbr->ack_epoch_acked = 0; bbr->ack_epoch_mstamp = tp->delivered_mstamp; expected_acked = 0; } /* Compute excess data delivered, beyond what was expected. */ bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, bbr->ack_epoch_acked + rs->acked_sacked); extra_acked = bbr->ack_epoch_acked - expected_acked; extra_acked = min(extra_acked, tcp_snd_cwnd(tp)); if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; } /* Estimate when the pipe is full, using the change in delivery rate: BBR * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the * higher rwin, 3: we get higher delivery rate samples. Or transient * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar * design goal, but uses delay and inter-ACK spacing instead of bandwidth. */ static void bbr_check_full_bw_reached(struct sock *sk, const struct rate_sample *rs) { struct bbr *bbr = inet_csk_ca(sk); u32 bw_thresh; if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) return; bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; if (bbr_max_bw(sk) >= bw_thresh) { bbr->full_bw = bbr_max_bw(sk); bbr->full_bw_cnt = 0; return; } ++bbr->full_bw_cnt; bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt; } /* If pipe is probably full, drain the queue and then enter steady-state. */ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) { struct bbr *bbr = inet_csk_ca(sk); if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { bbr->mode = BBR_DRAIN; /* drain queue we created */ tcp_sk(sk)->snd_ssthresh = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } static void bbr_check_probe_rtt_done(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); if (!(bbr->probe_rtt_done_stamp && after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) return; bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd)); bbr_reset_mode(sk); } /* The goal of PROBE_RTT mode is to have BBR flows cooperatively and * periodically drain the bottleneck queue, to converge to measure the true * min_rtt (unloaded propagation delay). This allows the flows to keep queues * small (reducing queuing delay and packet loss) and achieve fairness among * BBR flows. * * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and * re-enter the previous mode. BBR uses 200ms to approximately bound the * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). * * Note that flows need only pay 2% if they are busy sending over the last 10 * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have * natural silences or low-rate periods within 10 seconds where the rate is low * enough for long enough to drain its queue in the bottleneck. We pick up * these min RTT measurements opportunistically with our min_rtt filter. :-) */ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); bool filter_expired; /* Track min RTT seen in the min_rtt_win_sec filter window: */ filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && (rs->rtt_us < bbr->min_rtt_us || (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; } if (bbr_probe_rtt_mode_ms > 0 && filter_expired && !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ bbr_save_cwnd(sk); /* note cwnd so we can restore it */ bbr->probe_rtt_done_stamp = 0; } if (bbr->mode == BBR_PROBE_RTT) { /* Ignore low rate samples during this mode. */ tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; /* Maintain min packets in flight for max(200 ms, 1 round). */ if (!bbr->probe_rtt_done_stamp && tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { bbr->probe_rtt_done_stamp = tcp_jiffies32 + msecs_to_jiffies(bbr_probe_rtt_mode_ms); bbr->probe_rtt_round_done = 0; bbr->next_rtt_delivered = tp->delivered; } else if (bbr->probe_rtt_done_stamp) { if (bbr->round_start) bbr->probe_rtt_round_done = 1; if (bbr->probe_rtt_round_done) bbr_check_probe_rtt_done(sk); } } /* Restart after idle ends only once we process a new S/ACK for data */ if (rs->delivered > 0) bbr->idle_restart = 0; } static void bbr_update_gains(struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); switch (bbr->mode) { case BBR_STARTUP: bbr->pacing_gain = bbr_high_gain; bbr->cwnd_gain = bbr_high_gain; break; case BBR_DRAIN: bbr->pacing_gain = bbr_drain_gain; /* slow, to drain */ bbr->cwnd_gain = bbr_high_gain; /* keep cwnd */ break; case BBR_PROBE_BW: bbr->pacing_gain = (bbr->lt_use_bw ? BBR_UNIT : bbr_pacing_gain[bbr->cycle_idx]); bbr->cwnd_gain = bbr_cwnd_gain; break; case BBR_PROBE_RTT: bbr->pacing_gain = BBR_UNIT; bbr->cwnd_gain = BBR_UNIT; break; default: WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); break; } } static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) { bbr_update_bw(sk, rs); bbr_update_ack_aggregation(sk, rs); bbr_update_cycle_phase(sk, rs); bbr_check_full_bw_reached(sk, rs); bbr_check_drain(sk, rs); bbr_update_min_rtt(sk, rs); bbr_update_gains(sk); } __bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) { struct bbr *bbr = inet_csk_ca(sk); u32 bw; bbr_update_model(sk, rs); bw = bbr_bw(sk); bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); } __bpf_kfunc static void bbr_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; bbr->rtt_cnt = 0; bbr->next_rtt_delivered = tp->delivered; bbr->prev_ca_state = TCP_CA_Open; bbr->packet_conservation = 0; bbr->probe_rtt_done_stamp = 0; bbr->probe_rtt_round_done = 0; bbr->min_rtt_us = tcp_min_rtt(tp); bbr->min_rtt_stamp = tcp_jiffies32; minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ bbr->has_seen_rtt = 0; bbr_init_pacing_rate_from_rtt(sk); bbr->round_start = 0; bbr->idle_restart = 0; bbr->full_bw_reached = 0; bbr->full_bw = 0; bbr->full_bw_cnt = 0; bbr->cycle_mstamp = 0; bbr->cycle_idx = 0; bbr_reset_lt_bw_sampling(sk); bbr_reset_startup_mode(sk); bbr->ack_epoch_mstamp = tp->tcp_mstamp; bbr->ack_epoch_acked = 0; bbr->extra_acked_win_rtts = 0; bbr->extra_acked_win_idx = 0; bbr->extra_acked[0] = 0; bbr->extra_acked[1] = 0; cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); } __bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk) { /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ return 3; } /* In theory BBR does not need to undo the cwnd since it does not * always reduce cwnd on losses (see bbr_main()). Keep it for now. */ __bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk) { struct bbr *bbr = inet_csk_ca(sk); bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ bbr->full_bw_cnt = 0; bbr_reset_lt_bw_sampling(sk); return tcp_snd_cwnd(tcp_sk(sk)); } /* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ __bpf_kfunc static u32 bbr_ssthresh(struct sock *sk) { bbr_save_cwnd(sk); return tcp_sk(sk)->snd_ssthresh; } static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || ext & (1 << (INET_DIAG_VEGASINFO - 1))) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); u64 bw = bbr_bw(sk); bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; memset(&info->bbr, 0, sizeof(info->bbr)); info->bbr.bbr_bw_lo = (u32)bw; info->bbr.bbr_bw_hi = (u32)(bw >> 32); info->bbr.bbr_min_rtt = bbr->min_rtt_us; info->bbr.bbr_pacing_gain = bbr->pacing_gain; info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; *attr = INET_DIAG_BBRINFO; return sizeof(info->bbr); } return 0; } __bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state) { struct bbr *bbr = inet_csk_ca(sk); if (new_state == TCP_CA_Loss) { struct rate_sample rs = { .losses = 1 }; bbr->prev_ca_state = TCP_CA_Loss; bbr->full_bw = 0; bbr->round_start = 1; /* treat RTO like end of a round */ bbr_lt_bw_sampling(sk, &rs); } } static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .flags = TCP_CONG_NON_RESTRICTED, .name = "bbr", .owner = THIS_MODULE, .init = bbr_init, .cong_control = bbr_main, .sndbuf_expand = bbr_sndbuf_expand, .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, .min_tso_segs = bbr_min_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; BTF_KFUNCS_START(tcp_bbr_check_kfunc_ids) BTF_ID_FLAGS(func, bbr_init) BTF_ID_FLAGS(func, bbr_main) BTF_ID_FLAGS(func, bbr_sndbuf_expand) BTF_ID_FLAGS(func, bbr_undo_cwnd) BTF_ID_FLAGS(func, bbr_cwnd_event) BTF_ID_FLAGS(func, bbr_ssthresh) BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) BTF_KFUNCS_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { .owner = THIS_MODULE, .set = &tcp_bbr_check_kfunc_ids, }; static int __init bbr_register(void) { int ret; BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set); if (ret < 0) return ret; return tcp_register_congestion_control(&tcp_bbr_cong_ops); } static void __exit bbr_unregister(void) { tcp_unregister_congestion_control(&tcp_bbr_cong_ops); } module_init(bbr_register); module_exit(bbr_unregister); MODULE_AUTHOR("Van Jacobson <vanj@google.com>"); MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>"); MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>"); MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); |
12 12 116 119 126 126 1123 122 739 60 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _PROTO_MEMORY_H #define _PROTO_MEMORY_H #include <net/sock.h> #include <net/hotdata.h> /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) static inline bool sk_has_memory_pressure(const struct sock *sk) { return sk->sk_prot->memory_pressure != NULL; } static inline bool proto_memory_pressure(const struct proto *prot) { if (!prot->memory_pressure) return false; return !!READ_ONCE(*prot->memory_pressure); } static inline bool sk_under_global_memory_pressure(const struct sock *sk) { return proto_memory_pressure(sk->sk_prot); } static inline bool sk_under_memory_pressure(const struct sock *sk) { if (!sk->sk_prot->memory_pressure) return false; if (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; return !!READ_ONCE(*sk->sk_prot->memory_pressure); } static inline long proto_memory_allocated(const struct proto *prot) { return max(0L, atomic_long_read(prot->memory_allocated)); } static inline long sk_memory_allocated(const struct sock *sk) { return proto_memory_allocated(sk->sk_prot); } static inline void proto_memory_pcpu_drain(struct proto *proto) { int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0); if (val) atomic_long_add(val, proto->memory_allocated); } static inline void sk_memory_allocated_add(const struct sock *sk, int val) { struct proto *proto = sk->sk_prot; val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val); if (unlikely(val >= READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv))) proto_memory_pcpu_drain(proto); } static inline void sk_memory_allocated_sub(const struct sock *sk, int val) { struct proto *proto = sk->sk_prot; val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val); if (unlikely(val <= -READ_ONCE(net_hotdata.sysctl_mem_pcpu_rsv))) proto_memory_pcpu_drain(proto); } #endif /* _PROTO_MEMORY_H */ |
11 9 18 7 15 18 18 17 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | #ifndef __NET_SCHED_CODEL_IMPL_H #define __NET_SCHED_CODEL_IMPL_H /* * Codel - The Controlled-Delay Active Queue Management algorithm * * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ /* Controlling Queue Delay (CoDel) algorithm * ========================================= * Source : Kathleen Nichols and Van Jacobson * http://queue.acm.org/detail.cfm?id=2209336 * * Implemented on linux by Dave Taht and Eric Dumazet */ #include <net/inet_ecn.h> static void codel_params_init(struct codel_params *params) { params->interval = MS2TIME(100); params->target = MS2TIME(5); params->ce_threshold = CODEL_DISABLED_THRESHOLD; params->ce_threshold_mask = 0; params->ce_threshold_selector = 0; params->ecn = false; } static void codel_vars_init(struct codel_vars *vars) { memset(vars, 0, sizeof(*vars)); } static void codel_stats_init(struct codel_stats *stats) { stats->maxpacket = 0; } /* * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ static void codel_Newton_step(struct codel_vars *vars) { u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); val >>= 2; /* avoid overflow in following multiply */ val = (val * invsqrt) >> (32 - 2 + 1); vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; } /* * CoDel control_law is t + interval/sqrt(count) * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid * both sqrt() and divide operation. */ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, u32 rec_inv_sqrt) { return t + reciprocal_scale(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); } static bool codel_should_drop(const struct sk_buff *skb, void *ctx, struct codel_vars *vars, struct codel_params *params, struct codel_stats *stats, codel_skb_len_t skb_len_func, codel_skb_time_t skb_time_func, u32 *backlog, codel_time_t now) { bool ok_to_drop; u32 skb_len; if (!skb) { vars->first_above_time = 0; return false; } skb_len = skb_len_func(skb); vars->ldelay = now - skb_time_func(skb); if (unlikely(skb_len > stats->maxpacket)) stats->maxpacket = skb_len; if (codel_time_before(vars->ldelay, params->target) || *backlog <= params->mtu) { /* went below - stay below for at least interval */ vars->first_above_time = 0; return false; } ok_to_drop = false; if (vars->first_above_time == 0) { /* just went above from below. If we stay above * for at least interval we'll say it's ok to drop */ vars->first_above_time = now + params->interval; } else if (codel_time_after(now, vars->first_above_time)) { ok_to_drop = true; } return ok_to_drop; } static struct sk_buff *codel_dequeue(void *ctx, u32 *backlog, struct codel_params *params, struct codel_vars *vars, struct codel_stats *stats, codel_skb_len_t skb_len_func, codel_skb_time_t skb_time_func, codel_skb_drop_t drop_func, codel_skb_dequeue_t dequeue_func) { struct sk_buff *skb = dequeue_func(vars, ctx); codel_time_t now; bool drop; if (!skb) { vars->dropping = false; return skb; } now = codel_get_time(); drop = codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now); if (vars->dropping) { if (!drop) { /* sojourn time below target - leave dropping state */ vars->dropping = false; } else if (codel_time_after_eq(now, vars->drop_next)) { /* It's time for the next drop. Drop the current * packet and dequeue the next. The dequeue might * take us out of dropping state. * If not, schedule the next drop. * A large backlog might result in drop rates so high * that the next drop should happen now, * hence the while loop. */ while (vars->dropping && codel_time_after_eq(now, vars->drop_next)) { vars->count++; /* dont care of possible wrap * since there is no more divide */ codel_Newton_step(vars); if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; vars->drop_next = codel_control_law(vars->drop_next, params->interval, vars->rec_inv_sqrt); goto end; } stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); stats->drop_count++; skb = dequeue_func(vars, ctx); if (!codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now)) { /* leave dropping state */ vars->dropping = false; } else { /* and schedule the next drop */ vars->drop_next = codel_control_law(vars->drop_next, params->interval, vars->rec_inv_sqrt); } } } } else if (drop) { u32 delta; if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { stats->drop_len += skb_len_func(skb); drop_func(skb, ctx); stats->drop_count++; skb = dequeue_func(vars, ctx); drop = codel_should_drop(skb, ctx, vars, params, stats, skb_len_func, skb_time_func, backlog, now); } vars->dropping = true; /* if min went above target close to when we last went below it * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ delta = vars->count - vars->lastcount; if (delta > 1 && codel_time_before(now - vars->drop_next, 16 * params->interval)) { vars->count = delta; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. */ codel_Newton_step(vars); } else { vars->count = 1; vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } vars->lastcount = vars->count; vars->drop_next = codel_control_law(now, params->interval, vars->rec_inv_sqrt); } end: if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { bool set_ce = true; if (params->ce_threshold_mask) { int dsfield = skb_get_dsfield(skb); set_ce = (dsfield >= 0 && (((u8)dsfield & params->ce_threshold_mask) == params->ce_threshold_selector)); } if (set_ce && INET_ECN_set_ce(skb)) stats->ce_mark++; } return skb; } #endif |
6 1 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * * Author: Artem Bityutskiy (Битюцкий Артём) */ #include "ubi.h" #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/fault-inject.h> #ifdef CONFIG_MTD_UBI_FAULT_INJECTION static DECLARE_FAULT_ATTR(fault_eccerr_attr); static DECLARE_FAULT_ATTR(fault_bitflips_attr); static DECLARE_FAULT_ATTR(fault_read_failure_attr); static DECLARE_FAULT_ATTR(fault_write_failure_attr); static DECLARE_FAULT_ATTR(fault_erase_failure_attr); static DECLARE_FAULT_ATTR(fault_power_cut_attr); static DECLARE_FAULT_ATTR(fault_io_ff_attr); static DECLARE_FAULT_ATTR(fault_io_ff_bitflips_attr); static DECLARE_FAULT_ATTR(fault_bad_hdr_attr); static DECLARE_FAULT_ATTR(fault_bad_hdr_ebadmsg_attr); #define FAIL_ACTION(name, fault_attr) \ bool should_fail_##name(void) \ { \ return should_fail(&fault_attr, 1); \ } FAIL_ACTION(eccerr, fault_eccerr_attr) FAIL_ACTION(bitflips, fault_bitflips_attr) FAIL_ACTION(read_failure, fault_read_failure_attr) FAIL_ACTION(write_failure, fault_write_failure_attr) FAIL_ACTION(erase_failure, fault_erase_failure_attr) FAIL_ACTION(power_cut, fault_power_cut_attr) FAIL_ACTION(io_ff, fault_io_ff_attr) FAIL_ACTION(io_ff_bitflips, fault_io_ff_bitflips_attr) FAIL_ACTION(bad_hdr, fault_bad_hdr_attr) FAIL_ACTION(bad_hdr_ebadmsg, fault_bad_hdr_ebadmsg_attr) #endif /** * ubi_dump_flash - dump a region of flash. * @ubi: UBI device description object * @pnum: the physical eraseblock number to dump * @offset: the starting offset within the physical eraseblock to dump * @len: the length of the region to dump */ void ubi_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) { int err; size_t read; void *buf; loff_t addr = (loff_t)pnum * ubi->peb_size + offset; buf = vmalloc(len); if (!buf) return; err = mtd_read(ubi->mtd, addr, len, &read, buf); if (err && err != -EUCLEAN) { ubi_err(ubi, "err %d while reading %d bytes from PEB %d:%d, read %zd bytes", err, len, pnum, offset, read); goto out; } ubi_msg(ubi, "dumping %d bytes of data from PEB %d, offset %d", len, pnum, offset); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); out: vfree(buf); return; } /** * ubi_dump_ec_hdr - dump an erase counter header. * @ec_hdr: the erase counter header to dump */ void ubi_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { pr_err("Erase counter header dump:\n"); pr_err("\tmagic %#08x\n", be32_to_cpu(ec_hdr->magic)); pr_err("\tversion %d\n", (int)ec_hdr->version); pr_err("\tec %llu\n", (long long)be64_to_cpu(ec_hdr->ec)); pr_err("\tvid_hdr_offset %d\n", be32_to_cpu(ec_hdr->vid_hdr_offset)); pr_err("\tdata_offset %d\n", be32_to_cpu(ec_hdr->data_offset)); pr_err("\timage_seq %d\n", be32_to_cpu(ec_hdr->image_seq)); pr_err("\thdr_crc %#08x\n", be32_to_cpu(ec_hdr->hdr_crc)); pr_err("erase counter header hexdump:\n"); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ec_hdr, UBI_EC_HDR_SIZE, 1); } /** * ubi_dump_vid_hdr - dump a volume identifier header. * @vid_hdr: the volume identifier header to dump */ void ubi_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { pr_err("Volume identifier header dump:\n"); pr_err("\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); pr_err("\tversion %d\n", (int)vid_hdr->version); pr_err("\tvol_type %d\n", (int)vid_hdr->vol_type); pr_err("\tcopy_flag %d\n", (int)vid_hdr->copy_flag); pr_err("\tcompat %d\n", (int)vid_hdr->compat); pr_err("\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); pr_err("\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); pr_err("\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); pr_err("\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); pr_err("\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); pr_err("\tsqnum %llu\n", (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); pr_err("\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); pr_err("Volume identifier header hexdump:\n"); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, vid_hdr, UBI_VID_HDR_SIZE, 1); } /** * ubi_dump_vol_info - dump volume information. * @vol: UBI volume description object */ void ubi_dump_vol_info(const struct ubi_volume *vol) { pr_err("Volume information dump:\n"); pr_err("\tvol_id %d\n", vol->vol_id); pr_err("\treserved_pebs %d\n", vol->reserved_pebs); pr_err("\talignment %d\n", vol->alignment); pr_err("\tdata_pad %d\n", vol->data_pad); pr_err("\tvol_type %d\n", vol->vol_type); pr_err("\tname_len %d\n", vol->name_len); pr_err("\tusable_leb_size %d\n", vol->usable_leb_size); pr_err("\tused_ebs %d\n", vol->used_ebs); pr_err("\tused_bytes %lld\n", vol->used_bytes); pr_err("\tlast_eb_bytes %d\n", vol->last_eb_bytes); pr_err("\tcorrupted %d\n", vol->corrupted); pr_err("\tupd_marker %d\n", vol->upd_marker); pr_err("\tskip_check %d\n", vol->skip_check); if (vol->name_len <= UBI_VOL_NAME_MAX && strnlen(vol->name, vol->name_len + 1) == vol->name_len) { pr_err("\tname %s\n", vol->name); } else { pr_err("\t1st 5 characters of name: %c%c%c%c%c\n", vol->name[0], vol->name[1], vol->name[2], vol->name[3], vol->name[4]); } } /** * ubi_dump_vtbl_record - dump a &struct ubi_vtbl_record object. * @r: the object to dump * @idx: volume table index */ void ubi_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { int name_len = be16_to_cpu(r->name_len); pr_err("Volume table record %d dump:\n", idx); pr_err("\treserved_pebs %d\n", be32_to_cpu(r->reserved_pebs)); pr_err("\talignment %d\n", be32_to_cpu(r->alignment)); pr_err("\tdata_pad %d\n", be32_to_cpu(r->data_pad)); pr_err("\tvol_type %d\n", (int)r->vol_type); pr_err("\tupd_marker %d\n", (int)r->upd_marker); pr_err("\tname_len %d\n", name_len); if (r->name[0] == '\0') { pr_err("\tname NULL\n"); return; } if (name_len <= UBI_VOL_NAME_MAX && strnlen(&r->name[0], name_len + 1) == name_len) { pr_err("\tname %s\n", &r->name[0]); } else { pr_err("\t1st 5 characters of name: %c%c%c%c%c\n", r->name[0], r->name[1], r->name[2], r->name[3], r->name[4]); } pr_err("\tcrc %#08x\n", be32_to_cpu(r->crc)); } /** * ubi_dump_av - dump a &struct ubi_ainf_volume object. * @av: the object to dump */ void ubi_dump_av(const struct ubi_ainf_volume *av) { pr_err("Volume attaching information dump:\n"); pr_err("\tvol_id %d\n", av->vol_id); pr_err("\thighest_lnum %d\n", av->highest_lnum); pr_err("\tleb_count %d\n", av->leb_count); pr_err("\tcompat %d\n", av->compat); pr_err("\tvol_type %d\n", av->vol_type); pr_err("\tused_ebs %d\n", av->used_ebs); pr_err("\tlast_data_size %d\n", av->last_data_size); pr_err("\tdata_pad %d\n", av->data_pad); } /** * ubi_dump_aeb - dump a &struct ubi_ainf_peb object. * @aeb: the object to dump * @type: object type: 0 - not corrupted, 1 - corrupted */ void ubi_dump_aeb(const struct ubi_ainf_peb *aeb, int type) { pr_err("eraseblock attaching information dump:\n"); pr_err("\tec %d\n", aeb->ec); pr_err("\tpnum %d\n", aeb->pnum); if (type == 0) { pr_err("\tlnum %d\n", aeb->lnum); pr_err("\tscrub %d\n", aeb->scrub); pr_err("\tsqnum %llu\n", aeb->sqnum); } } /** * ubi_dump_mkvol_req - dump a &struct ubi_mkvol_req object. * @req: the object to dump */ void ubi_dump_mkvol_req(const struct ubi_mkvol_req *req) { char nm[17]; pr_err("Volume creation request dump:\n"); pr_err("\tvol_id %d\n", req->vol_id); pr_err("\talignment %d\n", req->alignment); pr_err("\tbytes %lld\n", (long long)req->bytes); pr_err("\tvol_type %d\n", req->vol_type); pr_err("\tname_len %d\n", req->name_len); memcpy(nm, req->name, 16); nm[16] = 0; pr_err("\t1st 16 characters of name: %s\n", nm); } /* * Root directory for UBI stuff in debugfs. Contains sub-directories which * contain the stuff specific to particular UBI devices. */ static struct dentry *dfs_rootdir; #ifdef CONFIG_MTD_UBI_FAULT_INJECTION static void dfs_create_fault_entry(struct dentry *parent) { struct dentry *dir; dir = debugfs_create_dir("fault_inject", parent); if (IS_ERR_OR_NULL(dir)) { int err = dir ? PTR_ERR(dir) : -ENODEV; pr_warn("UBI error: cannot create \"fault_inject\" debugfs directory, error %d\n", err); return; } fault_create_debugfs_attr("emulate_eccerr", dir, &fault_eccerr_attr); fault_create_debugfs_attr("emulate_read_failure", dir, &fault_read_failure_attr); fault_create_debugfs_attr("emulate_bitflips", dir, &fault_bitflips_attr); fault_create_debugfs_attr("emulate_write_failure", dir, &fault_write_failure_attr); fault_create_debugfs_attr("emulate_erase_failure", dir, &fault_erase_failure_attr); fault_create_debugfs_attr("emulate_power_cut", dir, &fault_power_cut_attr); fault_create_debugfs_attr("emulate_io_ff", dir, &fault_io_ff_attr); fault_create_debugfs_attr("emulate_io_ff_bitflips", dir, &fault_io_ff_bitflips_attr); fault_create_debugfs_attr("emulate_bad_hdr", dir, &fault_bad_hdr_attr); fault_create_debugfs_attr("emulate_bad_hdr_ebadmsg", dir, &fault_bad_hdr_ebadmsg_attr); } #endif /** * ubi_debugfs_init - create UBI debugfs directory. * * Create UBI debugfs directory. Returns zero in case of success and a negative * error code in case of failure. */ int ubi_debugfs_init(void) { if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; dfs_rootdir = debugfs_create_dir("ubi", NULL); if (IS_ERR_OR_NULL(dfs_rootdir)) { int err = dfs_rootdir ? PTR_ERR(dfs_rootdir) : -ENODEV; pr_err("UBI error: cannot create \"ubi\" debugfs directory, error %d\n", err); return err; } #ifdef CONFIG_MTD_UBI_FAULT_INJECTION dfs_create_fault_entry(dfs_rootdir); #endif return 0; } /** * ubi_debugfs_exit - remove UBI debugfs directory. */ void ubi_debugfs_exit(void) { if (IS_ENABLED(CONFIG_DEBUG_FS)) debugfs_remove(dfs_rootdir); } /* Read an UBI debugfs file */ static ssize_t dfs_file_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { unsigned long ubi_num = (unsigned long)file->private_data; struct dentry *dent = file->f_path.dentry; struct ubi_device *ubi; struct ubi_debug_info *d; char buf[16]; int val; ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; d = &ubi->dbg; if (dent == d->dfs_chk_gen) val = d->chk_gen; else if (dent == d->dfs_chk_io) val = d->chk_io; else if (dent == d->dfs_chk_fastmap) val = d->chk_fastmap; else if (dent == d->dfs_disable_bgt) val = d->disable_bgt; else if (dent == d->dfs_emulate_bitflips) val = d->emulate_bitflips; else if (dent == d->dfs_emulate_io_failures) val = d->emulate_io_failures; else if (dent == d->dfs_emulate_failures) { snprintf(buf, sizeof(buf), "0x%04x\n", d->emulate_failures); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else if (dent == d->dfs_emulate_power_cut) { snprintf(buf, sizeof(buf), "%u\n", d->emulate_power_cut); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else if (dent == d->dfs_power_cut_min) { snprintf(buf, sizeof(buf), "%u\n", d->power_cut_min); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else if (dent == d->dfs_power_cut_max) { snprintf(buf, sizeof(buf), "%u\n", d->power_cut_max); count = simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf)); goto out; } else { count = -EINVAL; goto out; } if (val) buf[0] = '1'; else buf[0] = '0'; buf[1] = '\n'; buf[2] = 0x00; count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); out: ubi_put_device(ubi); return count; } /* Write an UBI debugfs file */ static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { unsigned long ubi_num = (unsigned long)file->private_data; struct dentry *dent = file->f_path.dentry; struct ubi_device *ubi; struct ubi_debug_info *d; size_t buf_size; char buf[16] = {0}; int val; ubi = ubi_get_device(ubi_num); if (!ubi) return -ENODEV; d = &ubi->dbg; buf_size = min_t(size_t, count, (sizeof(buf) - 1)); if (copy_from_user(buf, user_buf, buf_size)) { count = -EFAULT; goto out; } if (dent == d->dfs_emulate_failures) { if (kstrtouint(buf, 0, &d->emulate_failures) != 0) count = -EINVAL; goto out; } else if (dent == d->dfs_power_cut_min) { if (kstrtouint(buf, 0, &d->power_cut_min) != 0) count = -EINVAL; goto out; } else if (dent == d->dfs_power_cut_max) { if (kstrtouint(buf, 0, &d->power_cut_max) != 0) count = -EINVAL; goto out; } else if (dent == d->dfs_emulate_power_cut) { if (kstrtoint(buf, 0, &val) != 0) count = -EINVAL; else d->emulate_power_cut = val; goto out; } if (buf[0] == '1') val = 1; else if (buf[0] == '0') val = 0; else { count = -EINVAL; goto out; } if (dent == d->dfs_chk_gen) d->chk_gen = val; else if (dent == d->dfs_chk_io) d->chk_io = val; else if (dent == d->dfs_chk_fastmap) d->chk_fastmap = val; else if (dent == d->dfs_disable_bgt) d->disable_bgt = val; else if (dent == d->dfs_emulate_bitflips) d->emulate_bitflips = val; else if (dent == d->dfs_emulate_io_failures) d->emulate_io_failures = val; else count = -EINVAL; out: ubi_put_device(ubi); return count; } /* File operations for all UBI debugfs files except * detailed_erase_block_info */ static const struct file_operations dfs_fops = { .read = dfs_file_read, .write = dfs_file_write, .open = simple_open, .owner = THIS_MODULE, }; /* As long as the position is less then that total number of erase blocks, * we still have more to print. */ static void *eraseblk_count_seq_start(struct seq_file *s, loff_t *pos) { struct ubi_device *ubi = s->private; if (*pos < ubi->peb_count) return pos; return NULL; } /* Since we are using the position as the iterator, we just need to check if we * are done and increment the position. */ static void *eraseblk_count_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct ubi_device *ubi = s->private; (*pos)++; if (*pos < ubi->peb_count) return pos; return NULL; } static void eraseblk_count_seq_stop(struct seq_file *s, void *v) { } static int eraseblk_count_seq_show(struct seq_file *s, void *iter) { struct ubi_device *ubi = s->private; struct ubi_wl_entry *wl; int *block_number = iter; int erase_count = -1; int err; /* If this is the start, print a header */ if (*block_number == 0) seq_puts(s, "physical_block_number\terase_count\n"); err = ubi_io_is_bad(ubi, *block_number); if (err) return err; spin_lock(&ubi->wl_lock); wl = ubi->lookuptbl[*block_number]; if (wl) erase_count = wl->ec; spin_unlock(&ubi->wl_lock); if (erase_count < 0) return 0; seq_printf(s, "%-22d\t%-11d\n", *block_number, erase_count); return 0; } static const struct seq_operations eraseblk_count_seq_ops = { .start = eraseblk_count_seq_start, .next = eraseblk_count_seq_next, .stop = eraseblk_count_seq_stop, .show = eraseblk_count_seq_show }; static int eraseblk_count_open(struct inode *inode, struct file *f) { struct seq_file *s; int err; err = seq_open(f, &eraseblk_count_seq_ops); if (err) return err; s = f->private_data; s->private = ubi_get_device((unsigned long)inode->i_private); if (!s->private) return -ENODEV; else return 0; } static int eraseblk_count_release(struct inode *inode, struct file *f) { struct seq_file *s = f->private_data; struct ubi_device *ubi = s->private; ubi_put_device(ubi); return seq_release(inode, f); } static const struct file_operations eraseblk_count_fops = { .owner = THIS_MODULE, .open = eraseblk_count_open, .read = seq_read, .llseek = seq_lseek, .release = eraseblk_count_release, }; /** * ubi_debugfs_init_dev - initialize debugfs for an UBI device. * @ubi: UBI device description object * * This function creates all debugfs files for UBI device @ubi. Returns zero in * case of success and a negative error code in case of failure. */ int ubi_debugfs_init_dev(struct ubi_device *ubi) { unsigned long ubi_num = ubi->ubi_num; struct ubi_debug_info *d = &ubi->dbg; umode_t mode = S_IRUSR | S_IWUSR; int n; if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN, UBI_DFS_DIR_NAME, ubi->ubi_num); if (n >= UBI_DFS_DIR_LEN) { /* The array size is too small */ return -EINVAL; } d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); d->dfs_chk_gen = debugfs_create_file("chk_gen", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_chk_io = debugfs_create_file("chk_io", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_chk_fastmap = debugfs_create_file("chk_fastmap", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_disable_bgt = debugfs_create_file("tst_disable_bgt", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_emulate_bitflips = debugfs_create_file("tst_emulate_bitflips", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_emulate_io_failures = debugfs_create_file("tst_emulate_io_failures", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_emulate_power_cut = debugfs_create_file("tst_emulate_power_cut", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_power_cut_min = debugfs_create_file("tst_emulate_power_cut_min", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); d->dfs_power_cut_max = debugfs_create_file("tst_emulate_power_cut_max", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); debugfs_create_file("detailed_erase_block_info", S_IRUSR, d->dfs_dir, (void *)ubi_num, &eraseblk_count_fops); #ifdef CONFIG_MTD_UBI_FAULT_INJECTION d->dfs_emulate_failures = debugfs_create_file("emulate_failures", mode, d->dfs_dir, (void *)ubi_num, &dfs_fops); #endif return 0; } /** * ubi_debugfs_exit_dev - free all debugfs files corresponding to device @ubi * @ubi: UBI device description object */ void ubi_debugfs_exit_dev(struct ubi_device *ubi) { if (IS_ENABLED(CONFIG_DEBUG_FS)) debugfs_remove_recursive(ubi->dbg.dfs_dir); } /** * ubi_dbg_power_cut - emulate a power cut if it is time to do so * @ubi: UBI device description object * @caller: Flags set to indicate from where the function is being called * * Returns non-zero if a power cut was emulated, zero if not. */ int ubi_dbg_power_cut(struct ubi_device *ubi, int caller) { unsigned int range; if ((ubi->dbg.emulate_power_cut & caller) == 0) return 0; if (ubi->dbg.power_cut_counter == 0) { ubi->dbg.power_cut_counter = ubi->dbg.power_cut_min; if (ubi->dbg.power_cut_max > ubi->dbg.power_cut_min) { range = ubi->dbg.power_cut_max - ubi->dbg.power_cut_min; ubi->dbg.power_cut_counter += get_random_u32_below(range); } return 0; } ubi->dbg.power_cut_counter--; if (ubi->dbg.power_cut_counter) return 0; return 1; } |
8 2 8 6 8 8 8 7 3 6 3 6 3 7 7 3 6 8 8 3 24 23 1 22 1 21 1 20 1 20 20 19 1 19 18 18 18 18 18 18 18 18 18 18 18 18 17 13 13 2 1 3 2 5 9 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 | // SPDX-License-Identifier: GPL-2.0-only #include <net/netdev_queues.h> #include "netlink.h" #include "common.h" struct rings_req_info { struct ethnl_req_info base; }; struct rings_reply_data { struct ethnl_reply_data base; struct ethtool_ringparam ringparam; struct kernel_ethtool_ringparam kernel_ringparam; u32 supported_ring_params; }; #define RINGS_REPDATA(__reply_base) \ container_of(__reply_base, struct rings_reply_data, base) const struct nla_policy ethnl_rings_get_policy[] = { [ETHTOOL_A_RINGS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int rings_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct rings_reply_data *data = RINGS_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; if (!dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; data->supported_ring_params = dev->ethtool_ops->supported_ring_params; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->kernel_ringparam.tcp_data_split = dev->cfg->hds_config; data->kernel_ringparam.hds_thresh = dev->cfg->hds_thresh; dev->ethtool_ops->get_ringparam(dev, &data->ringparam, &data->kernel_ringparam, info->extack); ethnl_ops_complete(dev); return 0; } static int rings_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u32)) + /* _RINGS_RX_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ nla_total_size(sizeof(u32)) + /* _RINGS_TX */ nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN_MAX */ nla_total_size(sizeof(u32)) + /* _RINGS_HDS_THRESH */ nla_total_size(sizeof(u32)); /* _RINGS_HDS_THRESH_MAX*/ } static int rings_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rings_reply_data *data = RINGS_REPDATA(reply_base); const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam; const struct ethtool_ringparam *ringparam = &data->ringparam; u32 supported_ring_params = data->supported_ring_params; WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED); if ((ringparam->rx_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX, ringparam->rx_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_RX, ringparam->rx_pending))) || (ringparam->rx_mini_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI_MAX, ringparam->rx_mini_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI, ringparam->rx_mini_pending))) || (ringparam->rx_jumbo_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO_MAX, ringparam->rx_jumbo_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO, ringparam->rx_jumbo_pending))) || (ringparam->tx_max_pending && (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_MAX, ringparam->tx_max_pending) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX, ringparam->tx_pending))) || (kr->rx_buf_len && (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_BUF_LEN, kr->rx_buf_len))) || (kr->tcp_data_split && (nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, kr->tcp_data_split))) || (kr->cqe_size && (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) || nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push) || ((supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN) && (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, kr->tx_push_buf_max_len) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, kr->tx_push_buf_len))) || ((supported_ring_params & ETHTOOL_RING_USE_HDS_THRS) && (nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH, kr->hds_thresh) || nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH_MAX, kr->hds_thresh_max)))) return -EMSGSIZE; return 0; } /* RINGS_SET */ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_RINGS_RX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TCP_DATA_SPLIT] = NLA_POLICY_MAX(NLA_U8, ETHTOOL_TCP_DATA_SPLIT_ENABLED), [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_HDS_THRESH] = { .type = NLA_U32 }, }; static int ethnl_set_rings_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; struct nlattr **tb = info->attrs; if (tb[ETHTOOL_A_RINGS_RX_BUF_LEN] && !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_RX_BUF_LEN], "setting rx buf len not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT] && !(ops->supported_ring_params & ETHTOOL_RING_USE_TCP_DATA_SPLIT)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], "setting TCP data split is not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_HDS_THRESH] && !(ops->supported_ring_params & ETHTOOL_RING_USE_HDS_THRS)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_HDS_THRESH], "setting hds-thresh is not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_CQE_SIZE] && !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_CQE_SIZE], "setting cqe size not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_TX_PUSH] && !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH], "setting tx push not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_RX_PUSH] && !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_PUSH)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_RX_PUSH], "setting rx push not supported"); return -EOPNOTSUPP; } if (tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] && !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], "setting tx push buf len is not supported"); return -EOPNOTSUPP; } return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP; } static int ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) { struct kernel_ethtool_ringparam kernel_ringparam = {}; struct ethtool_ringparam ringparam = {}; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; const struct nlattr *err_attr; bool mod = false; int ret; dev->ethtool_ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); kernel_ringparam.tcp_data_split = dev->cfg->hds_config; ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, tb[ETHTOOL_A_RINGS_RX_MINI], &mod); ethnl_update_u32(&ringparam.rx_jumbo_pending, tb[ETHTOOL_A_RINGS_RX_JUMBO], &mod); ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); ethnl_update_u32(&kernel_ringparam.rx_buf_len, tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); ethnl_update_u8(&kernel_ringparam.tcp_data_split, tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], &mod); ethnl_update_u32(&kernel_ringparam.cqe_size, tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); ethnl_update_u8(&kernel_ringparam.tx_push, tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); ethnl_update_u8(&kernel_ringparam.rx_push, tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); ethnl_update_u32(&kernel_ringparam.tx_push_buf_len, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod); ethnl_update_u32(&kernel_ringparam.hds_thresh, tb[ETHTOOL_A_RINGS_HDS_THRESH], &mod); if (!mod) return 0; if (kernel_ringparam.tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && dev_xdp_sb_prog_count(dev)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], "tcp-data-split can not be enabled with single buffer XDP"); return -EINVAL; } if (dev_get_min_mp_channel_count(dev)) { if (kernel_ringparam.tcp_data_split != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(info->extack, "can't disable tcp-data-split while device has memory provider enabled"); return -EINVAL; } else if (kernel_ringparam.hds_thresh) { NL_SET_ERR_MSG(info->extack, "can't set non-zero hds_thresh while device is memory provider enabled"); return -EINVAL; } } /* ensure new ring parameters are within limits */ if (ringparam.rx_pending > ringparam.rx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX]; else if (ringparam.rx_mini_pending > ringparam.rx_mini_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX_MINI]; else if (ringparam.rx_jumbo_pending > ringparam.rx_jumbo_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; else if (ringparam.tx_pending > ringparam.tx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_TX]; else if (kernel_ringparam.hds_thresh > kernel_ringparam.hds_thresh_max) err_attr = tb[ETHTOOL_A_RINGS_HDS_THRESH]; else err_attr = NULL; if (err_attr) { NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested ring size exceeds maximum"); return -EINVAL; } if (kernel_ringparam.tx_push_buf_len > kernel_ringparam.tx_push_buf_max_len) { NL_SET_ERR_MSG_ATTR_FMT(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], "Requested TX push buffer exceeds the maximum of %u", kernel_ringparam.tx_push_buf_max_len); return -EINVAL; } dev->cfg_pending->hds_config = kernel_ringparam.tcp_data_split; dev->cfg_pending->hds_thresh = kernel_ringparam.hds_thresh; ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_rings_request_ops = { .request_cmd = ETHTOOL_MSG_RINGS_GET, .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY, .hdr_attr = ETHTOOL_A_RINGS_HEADER, .req_info_size = sizeof(struct rings_req_info), .reply_data_size = sizeof(struct rings_reply_data), .prepare_data = rings_prepare_data, .reply_size = rings_reply_size, .fill_reply = rings_fill_reply, .set_validate = ethnl_set_rings_validate, .set = ethnl_set_rings, .set_ntf_cmd = ETHTOOL_MSG_RINGS_NTF, }; |
1 2 883 875 6 2 1230 1228 1230 563 26 112 674 150 64 243 1 454 403 1182 21 21 1186 14 29 14 21 7 4 13 3 9 15 17 9 26 21 5 1197 1203 26 2 57 3 21 74 9 10952 947 891 60 10207 1884 31 1858 41 4293 1 433 5 1 1 322 1902 7 2930 1843 885 32 5267 475 26 2 53 2719 7915 7903 11033 1225 83 133 297 12813 1824 12604 395 93 991 11297 1740 10991 11273 110 2415 2416 445 20 20 20 20 20 20 12652 342 222 293 3192 3197 841 840 1302 1263 21 4 17 2096 2103 1433 2080 833 2529 2530 2529 2314 2312 2314 19 19 19 5697 3333 81 5705 5711 5701 3338 3340 3331 80 81 81 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 | // SPDX-License-Identifier: GPL-2.0 /* * NETLINK Netlink attributes * * Authors: Thomas Graf <tgraf@suug.ch> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/jiffies.h> #include <linux/nospec.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> #include <net/netlink.h> /* For these data types, attribute length should be exactly the given * size. However, to maintain compatibility with broken commands, if the * attribute length does not match the expected size a warning is emitted * to the user that the command is sending invalid data and needs to be fixed. */ static const u8 nla_attr_len[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), [NLA_S8] = sizeof(s8), [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), [NLA_BE16] = sizeof(__be16), [NLA_BE32] = sizeof(__be32), }; static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), [NLA_MSECS] = sizeof(u64), [NLA_NESTED] = NLA_HDRLEN, [NLA_S8] = sizeof(s8), [NLA_S16] = sizeof(s16), [NLA_S32] = sizeof(s32), [NLA_S64] = sizeof(s64), [NLA_BE16] = sizeof(__be16), [NLA_BE32] = sizeof(__be32), }; /* * Nested policies might refer back to the original * policy in some cases, and userspace could try to * abuse that and recurse by nesting in the right * ways. Limit recursion to avoid this problem. */ #define MAX_POLICY_RECURSION_DEPTH 10 static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack, struct nlattr **tb, unsigned int depth); static int validate_nla_bitfield32(const struct nlattr *nla, const u32 valid_flags_mask) { const struct nla_bitfield32 *bf = nla_data(nla); if (!valid_flags_mask) return -EINVAL; /*disallow invalid bit selector */ if (bf->selector & ~valid_flags_mask) return -EINVAL; /*disallow invalid bit values */ if (bf->value & ~valid_flags_mask) return -EINVAL; /*disallow valid bit values that are not selected*/ if (bf->value & ~bf->selector) return -EINVAL; return 0; } static int nla_validate_array(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack, unsigned int validate, unsigned int depth) { const struct nlattr *entry; int rem; nla_for_each_attr(entry, head, len, rem) { int ret; if (nla_len(entry) == 0) continue; if (nla_len(entry) < NLA_HDRLEN) { NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy, "Array element too short"); return -ERANGE; } ret = __nla_validate_parse(nla_data(entry), nla_len(entry), maxtype, policy, validate, extack, NULL, depth + 1); if (ret < 0) return ret; } return 0; } void nla_get_range_unsigned(const struct nla_policy *pt, struct netlink_range_validation *range) { WARN_ON_ONCE(pt->validation_type != NLA_VALIDATE_RANGE_PTR && (pt->min < 0 || pt->max < 0)); range->min = 0; switch (pt->type) { case NLA_U8: range->max = U8_MAX; break; case NLA_U16: case NLA_BE16: case NLA_BINARY: range->max = U16_MAX; break; case NLA_U32: case NLA_BE32: range->max = U32_MAX; break; case NLA_U64: case NLA_UINT: case NLA_MSECS: range->max = U64_MAX; break; default: WARN_ON_ONCE(1); return; } switch (pt->validation_type) { case NLA_VALIDATE_RANGE: case NLA_VALIDATE_RANGE_WARN_TOO_LONG: range->min = pt->min; range->max = pt->max; break; case NLA_VALIDATE_RANGE_PTR: *range = *pt->range; break; case NLA_VALIDATE_MIN: range->min = pt->min; break; case NLA_VALIDATE_MAX: range->max = pt->max; break; default: break; } } static int nla_validate_range_unsigned(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack, unsigned int validate) { struct netlink_range_validation range; u64 value; switch (pt->type) { case NLA_U8: value = nla_get_u8(nla); break; case NLA_U16: value = nla_get_u16(nla); break; case NLA_U32: value = nla_get_u32(nla); break; case NLA_U64: value = nla_get_u64(nla); break; case NLA_UINT: value = nla_get_uint(nla); break; case NLA_MSECS: value = nla_get_u64(nla); break; case NLA_BINARY: value = nla_len(nla); break; case NLA_BE16: value = ntohs(nla_get_be16(nla)); break; case NLA_BE32: value = ntohl(nla_get_be32(nla)); break; default: return -EINVAL; } nla_get_range_unsigned(pt, &range); if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG && pt->type == NLA_BINARY && value > range.max) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, pt->type); if (validate & NL_VALIDATE_STRICT_ATTRS) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "invalid attribute length"); return -EINVAL; } /* this assumes min <= max (don't validate against min) */ return 0; } if (value < range.min || value > range.max) { bool binary = pt->type == NLA_BINARY; if (binary) NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "binary attribute size out of range"); else NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "integer out of range"); return -ERANGE; } return 0; } void nla_get_range_signed(const struct nla_policy *pt, struct netlink_range_validation_signed *range) { switch (pt->type) { case NLA_S8: range->min = S8_MIN; range->max = S8_MAX; break; case NLA_S16: range->min = S16_MIN; range->max = S16_MAX; break; case NLA_S32: range->min = S32_MIN; range->max = S32_MAX; break; case NLA_S64: case NLA_SINT: range->min = S64_MIN; range->max = S64_MAX; break; default: WARN_ON_ONCE(1); return; } switch (pt->validation_type) { case NLA_VALIDATE_RANGE: range->min = pt->min; range->max = pt->max; break; case NLA_VALIDATE_RANGE_PTR: *range = *pt->range_signed; break; case NLA_VALIDATE_MIN: range->min = pt->min; break; case NLA_VALIDATE_MAX: range->max = pt->max; break; default: break; } } static int nla_validate_int_range_signed(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack) { struct netlink_range_validation_signed range; s64 value; switch (pt->type) { case NLA_S8: value = nla_get_s8(nla); break; case NLA_S16: value = nla_get_s16(nla); break; case NLA_S32: value = nla_get_s32(nla); break; case NLA_S64: value = nla_get_s64(nla); break; case NLA_SINT: value = nla_get_sint(nla); break; default: return -EINVAL; } nla_get_range_signed(pt, &range); if (value < range.min || value > range.max) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "integer out of range"); return -ERANGE; } return 0; } static int nla_validate_int_range(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack, unsigned int validate) { switch (pt->type) { case NLA_U8: case NLA_U16: case NLA_U32: case NLA_U64: case NLA_UINT: case NLA_MSECS: case NLA_BINARY: case NLA_BE16: case NLA_BE32: return nla_validate_range_unsigned(pt, nla, extack, validate); case NLA_S8: case NLA_S16: case NLA_S32: case NLA_S64: case NLA_SINT: return nla_validate_int_range_signed(pt, nla, extack); default: WARN_ON(1); return -EINVAL; } } static int nla_validate_mask(const struct nla_policy *pt, const struct nlattr *nla, struct netlink_ext_ack *extack) { u64 value; switch (pt->type) { case NLA_U8: value = nla_get_u8(nla); break; case NLA_U16: value = nla_get_u16(nla); break; case NLA_U32: value = nla_get_u32(nla); break; case NLA_U64: value = nla_get_u64(nla); break; case NLA_UINT: value = nla_get_uint(nla); break; case NLA_BE16: value = ntohs(nla_get_be16(nla)); break; case NLA_BE32: value = ntohl(nla_get_be32(nla)); break; default: return -EINVAL; } if (value & ~(u64)pt->mask) { NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set"); return -EINVAL; } return 0; } static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack, unsigned int depth) { u16 strict_start_type = policy[0].strict_start_type; const struct nla_policy *pt; int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); int err = -ERANGE; if (strict_start_type && type >= strict_start_type) validate |= NL_VALIDATE_STRICT; if (type <= 0 || type > maxtype) return 0; type = array_index_nospec(type, maxtype + 1); pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); if (validate & NL_VALIDATE_STRICT_ATTRS) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "invalid attribute length"); return -EINVAL; } } if (validate & NL_VALIDATE_NESTED) { if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) && !(nla->nla_type & NLA_F_NESTED)) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "NLA_F_NESTED is missing"); return -EINVAL; } if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY && pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "NLA_F_NESTED not expected"); return -EINVAL; } } switch (pt->type) { case NLA_REJECT: if (extack && pt->reject_message) { NL_SET_BAD_ATTR(extack, nla); extack->_msg = pt->reject_message; return -EINVAL; } err = -EINVAL; goto out_err; case NLA_FLAG: if (attrlen > 0) goto out_err; break; case NLA_SINT: case NLA_UINT: if (attrlen != sizeof(u32) && attrlen != sizeof(u64)) { NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "invalid attribute length"); return -EINVAL; } break; case NLA_BITFIELD32: if (attrlen != sizeof(struct nla_bitfield32)) goto out_err; err = validate_nla_bitfield32(nla, pt->bitfield32_valid); if (err) goto out_err; break; case NLA_NUL_STRING: if (pt->len) minlen = min_t(int, attrlen, pt->len + 1); else minlen = attrlen; if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) { err = -EINVAL; goto out_err; } fallthrough; case NLA_STRING: if (attrlen < 1) goto out_err; if (pt->len) { char *buf = nla_data(nla); if (buf[attrlen - 1] == '\0') attrlen--; if (attrlen > pt->len) goto out_err; } break; case NLA_BINARY: if (pt->len && attrlen > pt->len) goto out_err; break; case NLA_NESTED: /* a nested attributes is allowed to be empty; if its not, * it must have a size of at least NLA_HDRLEN. */ if (attrlen == 0) break; if (attrlen < NLA_HDRLEN) goto out_err; if (pt->nested_policy) { err = __nla_validate_parse(nla_data(nla), nla_len(nla), pt->len, pt->nested_policy, validate, extack, NULL, depth + 1); if (err < 0) { /* * return directly to preserve the inner * error message/attribute pointer */ return err; } } break; case NLA_NESTED_ARRAY: /* a nested array attribute is allowed to be empty; if its not, * it must have a size of at least NLA_HDRLEN. */ if (attrlen == 0) break; if (attrlen < NLA_HDRLEN) goto out_err; if (pt->nested_policy) { int err; err = nla_validate_array(nla_data(nla), nla_len(nla), pt->len, pt->nested_policy, extack, validate, depth); if (err < 0) { /* * return directly to preserve the inner * error message/attribute pointer */ return err; } } break; case NLA_UNSPEC: if (validate & NL_VALIDATE_UNSPEC) { NL_SET_ERR_MSG_ATTR(extack, nla, "Unsupported attribute"); return -EINVAL; } if (attrlen < pt->len) goto out_err; break; default: if (pt->len) minlen = pt->len; else minlen = nla_attr_minlen[pt->type]; if (attrlen < minlen) goto out_err; } /* further validation */ switch (pt->validation_type) { case NLA_VALIDATE_NONE: /* nothing to do */ break; case NLA_VALIDATE_RANGE_PTR: case NLA_VALIDATE_RANGE: case NLA_VALIDATE_RANGE_WARN_TOO_LONG: case NLA_VALIDATE_MIN: case NLA_VALIDATE_MAX: err = nla_validate_int_range(pt, nla, extack, validate); if (err) return err; break; case NLA_VALIDATE_MASK: err = nla_validate_mask(pt, nla, extack); if (err) return err; break; case NLA_VALIDATE_FUNCTION: if (pt->validate) { err = pt->validate(nla, extack); if (err) return err; } break; } return 0; out_err: NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, "Attribute failed policy validation"); return err; } static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack, struct nlattr **tb, unsigned int depth) { const struct nlattr *nla; int rem; if (depth >= MAX_POLICY_RECURSION_DEPTH) { NL_SET_ERR_MSG(extack, "allowed policy recursion depth exceeded"); return -EINVAL; } if (tb) memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); nla_for_each_attr(nla, head, len, rem) { u16 type = nla_type(nla); if (type == 0 || type > maxtype) { if (validate & NL_VALIDATE_MAXTYPE) { NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute type"); return -EINVAL; } continue; } type = array_index_nospec(type, maxtype + 1); if (policy) { int err = validate_nla(nla, maxtype, policy, validate, extack, depth); if (err < 0) return err; } if (tb) tb[type] = (struct nlattr *)nla; } if (unlikely(rem > 0)) { pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", rem, current->comm); NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes"); if (validate & NL_VALIDATE_TRAILING) return -EINVAL; } return 0; } /** * __nla_validate - Validate a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @maxtype: maximum attribute type to be expected * @policy: validation policy * @validate: validation strictness * @extack: extended ACK report struct * * Validates all attributes in the specified attribute stream against the * specified policy. Validation depends on the validate flags passed, see * &enum netlink_validation for more details on that. * See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ int __nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { return __nla_validate_parse(head, len, maxtype, policy, validate, extack, NULL, 0); } EXPORT_SYMBOL(__nla_validate); /** * nla_policy_len - Determine the max. length of a policy * @p: policy to use * @n: number of policies * * Determines the max. length of the policy. It is currently used * to allocated Netlink buffers roughly the size of the actual * message. * * Returns 0 on success or a negative error code. */ int nla_policy_len(const struct nla_policy *p, int n) { int i, len = 0; for (i = 0; i < n; i++, p++) { if (p->len) len += nla_total_size(p->len); else if (nla_attr_len[p->type]) len += nla_total_size(nla_attr_len[p->type]); else if (nla_attr_minlen[p->type]) len += nla_total_size(nla_attr_minlen[p->type]); } return len; } EXPORT_SYMBOL(nla_policy_len); /** * __nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream * @policy: validation policy * @validate: validation strictness * @extack: extended ACK pointer * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessible via the attribute type. * Validation is controlled by the @validate parameter. * * Returns 0 on success or a negative error code. */ int __nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack) { return __nla_validate_parse(head, len, maxtype, policy, validate, extack, tb, 0); } EXPORT_SYMBOL(__nla_parse); /** * nla_find - Find a specific attribute in a stream of attributes * @head: head of attribute stream * @len: length of attribute stream * @attrtype: type of attribute to look for * * Returns the first attribute in the stream matching the specified type. */ struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) { const struct nlattr *nla; int rem; nla_for_each_attr(nla, head, len, rem) if (nla_type(nla) == attrtype) return (struct nlattr *)nla; return NULL; } EXPORT_SYMBOL(nla_find); /** * nla_strscpy - Copy string attribute payload into a sized buffer * @dst: Where to copy the string to. * @nla: Attribute to copy the string from. * @dstsize: Size of destination buffer. * * Copies at most dstsize - 1 bytes into the destination buffer. * Unlike strscpy() the destination buffer is always padded out. * * Return: * * srclen - Returns @nla length (not including the trailing %NUL). * * -E2BIG - If @dstsize is 0 or greater than U16_MAX or @nla length greater * than @dstsize. */ ssize_t nla_strscpy(char *dst, const struct nlattr *nla, size_t dstsize) { size_t srclen = nla_len(nla); char *src = nla_data(nla); ssize_t ret; size_t len; if (dstsize == 0 || WARN_ON_ONCE(dstsize > U16_MAX)) return -E2BIG; if (srclen > 0 && src[srclen - 1] == '\0') srclen--; if (srclen >= dstsize) { len = dstsize - 1; ret = -E2BIG; } else { len = srclen; ret = len; } memcpy(dst, src, len); /* Zero pad end of dst. */ memset(dst + len, 0, dstsize - len); return ret; } EXPORT_SYMBOL(nla_strscpy); /** * nla_strdup - Copy string attribute payload into a newly allocated buffer * @nla: attribute to copy the string from * @flags: the type of memory to allocate (see kmalloc). * * Returns a pointer to the allocated buffer or NULL on error. */ char *nla_strdup(const struct nlattr *nla, gfp_t flags) { size_t srclen = nla_len(nla); char *src = nla_data(nla), *dst; if (srclen > 0 && src[srclen - 1] == '\0') srclen--; dst = kmalloc(srclen + 1, flags); if (dst != NULL) { memcpy(dst, src, srclen); dst[srclen] = '\0'; } return dst; } EXPORT_SYMBOL(nla_strdup); /** * nla_memcpy - Copy a netlink attribute into another memory area * @dest: where to copy to memcpy * @src: netlink attribute to copy from * @count: size of the destination area * * Note: The number of bytes copied is limited by the length of * attribute's payload. memcpy * * Returns the number of bytes copied. */ int nla_memcpy(void *dest, const struct nlattr *src, int count) { int minlen = min_t(int, count, nla_len(src)); memcpy(dest, nla_data(src), minlen); if (count > minlen) memset(dest + minlen, 0, count - minlen); return minlen; } EXPORT_SYMBOL(nla_memcpy); /** * nla_memcmp - Compare an attribute with sized memory area * @nla: netlink attribute * @data: memory area * @size: size of memory area */ int nla_memcmp(const struct nlattr *nla, const void *data, size_t size) { int d = nla_len(nla) - size; if (d == 0) d = memcmp(nla_data(nla), data, size); return d; } EXPORT_SYMBOL(nla_memcmp); /** * nla_strcmp - Compare a string attribute against a string * @nla: netlink string attribute * @str: another string */ int nla_strcmp(const struct nlattr *nla, const char *str) { int len = strlen(str); char *buf = nla_data(nla); int attrlen = nla_len(nla); int d; while (attrlen > 0 && buf[attrlen - 1] == '\0') attrlen--; d = attrlen - len; if (d == 0) d = memcmp(nla_data(nla), str, len); return d; } EXPORT_SYMBOL(nla_strcmp); #ifdef CONFIG_NET /** * __nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) { struct nlattr *nla; nla = skb_put(skb, nla_total_size(attrlen)); nla->nla_type = attrtype; nla->nla_len = nla_attr_size(attrlen); memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); return nla; } EXPORT_SYMBOL(__nla_reserve); /** * __nla_reserve_64bit - reserve room for attribute on the skb and align it * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * @padattr: attribute type for the padding * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. It also ensure that this * attribute will have a 64-bit aligned nla_data() area. * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, int padattr) { nla_align_64bit(skb, padattr); return __nla_reserve(skb, attrtype, attrlen); } EXPORT_SYMBOL(__nla_reserve_64bit); /** * __nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * * The caller is responsible to ensure that the skb provides enough * tailroom for the payload. */ void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) { return skb_put_zero(skb, NLA_ALIGN(attrlen)); } EXPORT_SYMBOL(__nla_reserve_nohdr); /** * nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. * * Returns NULL if the tailroom of the skb is insufficient to store * the attribute header and payload. */ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) return NULL; return __nla_reserve(skb, attrtype, attrlen); } EXPORT_SYMBOL(nla_reserve); /** * nla_reserve_64bit - reserve room for attribute on the skb and align it * @skb: socket buffer to reserve room on * @attrtype: attribute type * @attrlen: length of attribute payload * @padattr: attribute type for the padding * * Adds a netlink attribute header to a socket buffer and reserves * room for the payload but does not copy it. It also ensure that this * attribute will have a 64-bit aligned nla_data() area. * * Returns NULL if the tailroom of the skb is insufficient to store * the attribute header and payload. */ struct nlattr *nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, int padattr) { size_t len; if (nla_need_padding_for_64bit(skb)) len = nla_total_size_64bit(attrlen); else len = nla_total_size(attrlen); if (unlikely(skb_tailroom(skb) < len)) return NULL; return __nla_reserve_64bit(skb, attrtype, attrlen, padattr); } EXPORT_SYMBOL(nla_reserve_64bit); /** * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * * Returns NULL if the tailroom of the skb is insufficient to store * the attribute payload. */ void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) return NULL; return __nla_reserve_nohdr(skb, attrlen); } EXPORT_SYMBOL(nla_reserve_nohdr); /** * __nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { struct nlattr *nla; nla = __nla_reserve(skb, attrtype, attrlen); memcpy(nla_data(nla), data, attrlen); } EXPORT_SYMBOL(__nla_put); /** * __nla_put_64bit - Add a netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * @padattr: attribute type for the padding * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute header and payload. */ void __nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, const void *data, int padattr) { struct nlattr *nla; nla = __nla_reserve_64bit(skb, attrtype, attrlen, padattr); memcpy(nla_data(nla), data, attrlen); } EXPORT_SYMBOL(__nla_put_64bit); /** * __nla_put_nohdr - Add a netlink attribute without header * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload * @data: head of attribute payload * * The caller is responsible to ensure that the skb provides enough * tailroom for the attribute payload. */ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { void *start; start = __nla_reserve_nohdr(skb, attrlen); memcpy(start, data, attrlen); } EXPORT_SYMBOL(__nla_put_nohdr); /** * nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) return -EMSGSIZE; __nla_put(skb, attrtype, attrlen, data); return 0; } EXPORT_SYMBOL(nla_put); /** * nla_put_64bit - Add a netlink attribute to a socket buffer and align it * @skb: socket buffer to add attribute to * @attrtype: attribute type * @attrlen: length of attribute payload * @data: head of attribute payload * @padattr: attribute type for the padding * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put_64bit(struct sk_buff *skb, int attrtype, int attrlen, const void *data, int padattr) { size_t len; if (nla_need_padding_for_64bit(skb)) len = nla_total_size_64bit(attrlen); else len = nla_total_size(attrlen); if (unlikely(skb_tailroom(skb) < len)) return -EMSGSIZE; __nla_put_64bit(skb, attrtype, attrlen, data, padattr); return 0; } EXPORT_SYMBOL(nla_put_64bit); /** * nla_put_nohdr - Add a netlink attribute without header * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload * @data: head of attribute payload * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) return -EMSGSIZE; __nla_put_nohdr(skb, attrlen, data); return 0; } EXPORT_SYMBOL(nla_put_nohdr); /** * nla_append - Add a netlink attribute without header or padding * @skb: socket buffer to add attribute to * @attrlen: length of attribute payload * @data: head of attribute payload * * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_append(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) return -EMSGSIZE; skb_put_data(skb, data, attrlen); return 0; } EXPORT_SYMBOL(nla_append); #endif |
1 1 4 4 4 4 4 4 4 1 1 5 5 1 4 1 1 3 3 1 1 2 1 1 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 | // SPDX-License-Identifier: GPL-2.0-only /* * vDPA bus. * * Copyright (c) 2020, Red Hat. All rights reserved. * Author: Jason Wang <jasowang@redhat.com> * */ #include <linux/module.h> #include <linux/idr.h> #include <linux/slab.h> #include <linux/vdpa.h> #include <uapi/linux/vdpa.h> #include <net/genetlink.h> #include <linux/mod_devicetable.h> #include <linux/virtio_ids.h> static LIST_HEAD(mdev_head); /* A global mutex that protects vdpa management device and device level operations. */ static DECLARE_RWSEM(vdpa_dev_lock); static DEFINE_IDA(vdpa_index_ida); void vdpa_set_status(struct vdpa_device *vdev, u8 status) { down_write(&vdev->cf_lock); vdev->config->set_status(vdev, status); up_write(&vdev->cf_lock); } EXPORT_SYMBOL(vdpa_set_status); static struct genl_family vdpa_nl_family; static int vdpa_dev_probe(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); const struct vdpa_config_ops *ops = vdev->config; u32 max_num, min_num = 1; int ret = 0; d->dma_mask = &d->coherent_dma_mask; ret = dma_set_mask_and_coherent(d, DMA_BIT_MASK(64)); if (ret) return ret; max_num = ops->get_vq_num_max(vdev); if (ops->get_vq_num_min) min_num = ops->get_vq_num_min(vdev); if (max_num < min_num) return -EINVAL; if (drv && drv->probe) ret = drv->probe(vdev); return ret; } static void vdpa_dev_remove(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); if (drv && drv->remove) drv->remove(vdev); } static int vdpa_dev_match(struct device *dev, const struct device_driver *drv) { struct vdpa_device *vdev = dev_to_vdpa(dev); /* Check override first, and if set, only use the named driver */ if (vdev->driver_override) return strcmp(vdev->driver_override, drv->name) == 0; /* Currently devices must be supported by all vDPA bus drivers */ return 1; } static ssize_t driver_override_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct vdpa_device *vdev = dev_to_vdpa(dev); int ret; ret = driver_set_override(dev, &vdev->driver_override, buf, count); if (ret) return ret; return count; } static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { struct vdpa_device *vdev = dev_to_vdpa(dev); ssize_t len; device_lock(dev); len = sysfs_emit(buf, "%s\n", vdev->driver_override); device_unlock(dev); return len; } static DEVICE_ATTR_RW(driver_override); static struct attribute *vdpa_dev_attrs[] = { &dev_attr_driver_override.attr, NULL, }; static const struct attribute_group vdpa_dev_group = { .attrs = vdpa_dev_attrs, }; __ATTRIBUTE_GROUPS(vdpa_dev); static const struct bus_type vdpa_bus = { .name = "vdpa", .dev_groups = vdpa_dev_groups, .match = vdpa_dev_match, .probe = vdpa_dev_probe, .remove = vdpa_dev_remove, }; static void vdpa_release_dev(struct device *d) { struct vdpa_device *vdev = dev_to_vdpa(d); const struct vdpa_config_ops *ops = vdev->config; if (ops->free) ops->free(vdev); ida_free(&vdpa_index_ida, vdev->index); kfree(vdev->driver_override); kfree(vdev); } /** * __vdpa_alloc_device - allocate and initilaize a vDPA device * This allows driver to some prepartion after device is * initialized but before registered. * @parent: the parent device * @config: the bus operations that is supported by this device * @ngroups: number of groups supported by this device * @nas: number of address spaces supported by this device * @size: size of the parent structure that contains private data * @name: name of the vdpa device; optional. * @use_va: indicate whether virtual address must be used by this device * * Driver should use vdpa_alloc_device() wrapper macro instead of * using this directly. * * Return: Returns an error when parent/config/dma_dev is not set or fail to get * ida. */ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, unsigned int ngroups, unsigned int nas, size_t size, const char *name, bool use_va) { struct vdpa_device *vdev; int err = -EINVAL; if (!config) goto err; if (!!config->dma_map != !!config->dma_unmap) goto err; /* It should only work for the device that use on-chip IOMMU */ if (use_va && !(config->dma_map || config->set_map)) goto err; err = -ENOMEM; vdev = kzalloc(size, GFP_KERNEL); if (!vdev) goto err; err = ida_alloc(&vdpa_index_ida, GFP_KERNEL); if (err < 0) goto err_ida; vdev->dev.bus = &vdpa_bus; vdev->dev.parent = parent; vdev->dev.release = vdpa_release_dev; vdev->index = err; vdev->config = config; vdev->features_valid = false; vdev->use_va = use_va; vdev->ngroups = ngroups; vdev->nas = nas; if (name) err = dev_set_name(&vdev->dev, "%s", name); else err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); if (err) goto err_name; init_rwsem(&vdev->cf_lock); device_initialize(&vdev->dev); return vdev; err_name: ida_free(&vdpa_index_ida, vdev->index); err_ida: kfree(vdev); err: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(__vdpa_alloc_device); static int vdpa_name_match(struct device *dev, const void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); return (strcmp(dev_name(&vdev->dev), data) == 0); } static int __vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { struct device *dev; vdev->nvqs = nvqs; lockdep_assert_held(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match); if (dev) { put_device(dev); return -EEXIST; } return device_add(&vdev->dev); } /** * _vdpa_register_device - register a vDPA device with vdpa lock held * Caller must have a succeed call of vdpa_alloc_device() before. * Caller must invoke this routine in the management device dev_add() * callback after setting up valid mgmtdev for this vdpa device. * @vdev: the vdpa device to be registered to vDPA bus * @nvqs: number of virtqueues supported by this device * * Return: Returns an error when fail to add device to vDPA bus */ int _vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { if (!vdev->mdev) return -EINVAL; return __vdpa_register_device(vdev, nvqs); } EXPORT_SYMBOL_GPL(_vdpa_register_device); /** * vdpa_register_device - register a vDPA device * Callers must have a succeed call of vdpa_alloc_device() before. * @vdev: the vdpa device to be registered to vDPA bus * @nvqs: number of virtqueues supported by this device * * Return: Returns an error when fail to add to vDPA bus */ int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { int err; down_write(&vdpa_dev_lock); err = __vdpa_register_device(vdev, nvqs); up_write(&vdpa_dev_lock); return err; } EXPORT_SYMBOL_GPL(vdpa_register_device); /** * _vdpa_unregister_device - unregister a vDPA device * Caller must invoke this routine as part of management device dev_del() * callback. * @vdev: the vdpa device to be unregisted from vDPA bus */ void _vdpa_unregister_device(struct vdpa_device *vdev) { lockdep_assert_held(&vdpa_dev_lock); WARN_ON(!vdev->mdev); device_unregister(&vdev->dev); } EXPORT_SYMBOL_GPL(_vdpa_unregister_device); /** * vdpa_unregister_device - unregister a vDPA device * @vdev: the vdpa device to be unregisted from vDPA bus */ void vdpa_unregister_device(struct vdpa_device *vdev) { down_write(&vdpa_dev_lock); device_unregister(&vdev->dev); up_write(&vdpa_dev_lock); } EXPORT_SYMBOL_GPL(vdpa_unregister_device); /** * __vdpa_register_driver - register a vDPA device driver * @drv: the vdpa device driver to be registered * @owner: module owner of the driver * * Return: Returns an err when fail to do the registration */ int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner) { drv->driver.bus = &vdpa_bus; drv->driver.owner = owner; return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(__vdpa_register_driver); /** * vdpa_unregister_driver - unregister a vDPA device driver * @drv: the vdpa device driver to be unregistered */ void vdpa_unregister_driver(struct vdpa_driver *drv) { driver_unregister(&drv->driver); } EXPORT_SYMBOL_GPL(vdpa_unregister_driver); /** * vdpa_mgmtdev_register - register a vdpa management device * * @mdev: Pointer to vdpa management device * vdpa_mgmtdev_register() register a vdpa management device which supports * vdpa device management. * Return: Returns 0 on success or failure when required callback ops are not * initialized. */ int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev) { if (!mdev->device || !mdev->ops || !mdev->ops->dev_add || !mdev->ops->dev_del) return -EINVAL; INIT_LIST_HEAD(&mdev->list); down_write(&vdpa_dev_lock); list_add_tail(&mdev->list, &mdev_head); up_write(&vdpa_dev_lock); return 0; } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_register); static int vdpa_match_remove(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); struct vdpa_mgmt_dev *mdev = vdev->mdev; if (mdev == data) mdev->ops->dev_del(mdev, vdev); return 0; } void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev) { down_write(&vdpa_dev_lock); list_del(&mdev->list); /* Filter out all the entries belong to this management device and delete it. */ bus_for_each_dev(&vdpa_bus, NULL, mdev, vdpa_match_remove); up_write(&vdpa_dev_lock); } EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); static void vdpa_get_config_unlocked(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len) { const struct vdpa_config_ops *ops = vdev->config; /* * Config accesses aren't supposed to trigger before features are set. * If it does happen we assume a legacy guest. */ if (!vdev->features_valid) vdpa_set_features_unlocked(vdev, 0); ops->get_config(vdev, offset, buf, len); } /** * vdpa_get_config - Get one or more device configuration fields. * @vdev: vdpa device to operate on * @offset: starting byte offset of the field * @buf: buffer pointer to read to * @len: length of the configuration fields in bytes */ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len) { down_read(&vdev->cf_lock); vdpa_get_config_unlocked(vdev, offset, buf, len); up_read(&vdev->cf_lock); } EXPORT_SYMBOL_GPL(vdpa_get_config); /** * vdpa_set_config - Set one or more device configuration fields. * @vdev: vdpa device to operate on * @offset: starting byte offset of the field * @buf: buffer pointer to read from * @length: length of the configuration fields in bytes */ void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, unsigned int length) { down_write(&vdev->cf_lock); vdev->config->set_config(vdev, offset, buf, length); up_write(&vdev->cf_lock); } EXPORT_SYMBOL_GPL(vdpa_set_config); static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev, const char *busname, const char *devname) { /* Bus name is optional for simulated management device, so ignore the * device with bus if bus attribute is provided. */ if ((busname && !mdev->device->bus) || (!busname && mdev->device->bus)) return false; if (!busname && strcmp(dev_name(mdev->device), devname) == 0) return true; if (busname && (strcmp(mdev->device->bus->name, busname) == 0) && (strcmp(dev_name(mdev->device), devname) == 0)) return true; return false; } static struct vdpa_mgmt_dev *vdpa_mgmtdev_get_from_attr(struct nlattr **attrs) { struct vdpa_mgmt_dev *mdev; const char *busname = NULL; const char *devname; if (!attrs[VDPA_ATTR_MGMTDEV_DEV_NAME]) return ERR_PTR(-EINVAL); devname = nla_data(attrs[VDPA_ATTR_MGMTDEV_DEV_NAME]); if (attrs[VDPA_ATTR_MGMTDEV_BUS_NAME]) busname = nla_data(attrs[VDPA_ATTR_MGMTDEV_BUS_NAME]); list_for_each_entry(mdev, &mdev_head, list) { if (mgmtdev_handle_match(mdev, busname, devname)) return mdev; } return ERR_PTR(-ENODEV); } static int vdpa_nl_mgmtdev_handle_fill(struct sk_buff *msg, const struct vdpa_mgmt_dev *mdev) { if (mdev->device->bus && nla_put_string(msg, VDPA_ATTR_MGMTDEV_BUS_NAME, mdev->device->bus->name)) return -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, dev_name(mdev->device))) return -EMSGSIZE; return 0; } static u64 vdpa_mgmtdev_get_classes(const struct vdpa_mgmt_dev *mdev, unsigned int *nclasses) { u64 supported_classes = 0; unsigned int n = 0; for (int i = 0; mdev->id_table[i].device; i++) { if (mdev->id_table[i].device > 63) continue; supported_classes |= BIT_ULL(mdev->id_table[i].device); n++; } if (nclasses) *nclasses = n; return supported_classes; } static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *msg, u32 portid, u32 seq, int flags) { void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_MGMTDEV_NEW); if (!hdr) return -EMSGSIZE; err = vdpa_nl_mgmtdev_handle_fill(msg, mdev); if (err) goto msg_err; if (nla_put_u64_64bit(msg, VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES, vdpa_mgmtdev_get_classes(mdev, NULL), VDPA_ATTR_UNSPEC)) { err = -EMSGSIZE; goto msg_err; } if (nla_put_u32(msg, VDPA_ATTR_DEV_MGMTDEV_MAX_VQS, mdev->max_supported_vqs)) { err = -EMSGSIZE; goto msg_err; } if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_SUPPORTED_FEATURES, mdev->supported_features, VDPA_ATTR_PAD)) { err = -EMSGSIZE; goto msg_err; } genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); return err; } static int vdpa_nl_cmd_mgmtdev_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_mgmt_dev *mdev; struct sk_buff *msg; int err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; down_read(&vdpa_dev_lock); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { up_read(&vdpa_dev_lock); NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified mgmt device"); err = PTR_ERR(mdev); goto out; } err = vdpa_mgmtdev_fill(mdev, msg, info->snd_portid, info->snd_seq, 0); up_read(&vdpa_dev_lock); if (err) goto out; err = genlmsg_reply(msg, info); return err; out: nlmsg_free(msg); return err; } static int vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct vdpa_mgmt_dev *mdev; int start = cb->args[0]; int idx = 0; int err; down_read(&vdpa_dev_lock); list_for_each_entry(mdev, &mdev_head, list) { if (idx < start) { idx++; continue; } err = vdpa_mgmtdev_fill(mdev, msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) goto out; idx++; } out: up_read(&vdpa_dev_lock); cb->args[0] = idx; return msg->len; } #define VDPA_DEV_NET_ATTRS_MASK (BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | \ BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) /* * Bitmask for all per-device features: feature bits VIRTIO_TRANSPORT_F_START * through VIRTIO_TRANSPORT_F_END are unset, i.e. 0xfffffc000fffffff for * all 64bit features. If the features are extended beyond 64 bits, or new * "holes" are reserved for other type of features than per-device, this * macro would have to be updated. */ #define VIRTIO_DEVICE_F_MASK (~0ULL << (VIRTIO_TRANSPORT_F_END + 1) | \ ((1ULL << VIRTIO_TRANSPORT_F_START) - 1)) static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_dev_set_config config = {}; struct nlattr **nl_attrs = info->attrs; struct vdpa_mgmt_dev *mdev; unsigned int ncls = 0; const u8 *macaddr; const char *name; u64 classes; int err = 0; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); memcpy(config.net.mac, macaddr, sizeof(config.net.mac)); config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); } if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) { config.net.mtu = nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU); } if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]) { config.net.max_vq_pairs = nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP]); if (!config.net.max_vq_pairs) { NL_SET_ERR_MSG_MOD(info->extack, "At least one pair of VQs is required"); return -EINVAL; } config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); } if (nl_attrs[VDPA_ATTR_DEV_FEATURES]) { u64 missing = 0x0ULL; config.device_features = nla_get_u64(nl_attrs[VDPA_ATTR_DEV_FEATURES]); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR] && !(config.device_features & BIT_ULL(VIRTIO_NET_F_MAC))) missing |= BIT_ULL(VIRTIO_NET_F_MAC); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU] && !(config.device_features & BIT_ULL(VIRTIO_NET_F_MTU))) missing |= BIT_ULL(VIRTIO_NET_F_MTU); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MAX_VQP] && config.net.max_vq_pairs > 1 && !(config.device_features & BIT_ULL(VIRTIO_NET_F_MQ))) missing |= BIT_ULL(VIRTIO_NET_F_MQ); if (missing) { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Missing features 0x%llx for provided attributes", missing); return -EINVAL; } config.mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES); } /* Skip checking capability if user didn't prefer to configure any * device networking attributes. It is likely that user might have used * a device specific method to configure such attributes or using device * default attributes. */ if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; down_write(&vdpa_dev_lock); mdev = vdpa_mgmtdev_get_from_attr(info->attrs); if (IS_ERR(mdev)) { NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified management device"); err = PTR_ERR(mdev); goto err; } if ((config.mask & mdev->config_attr_mask) != config.mask) { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Some provided attributes are not supported: 0x%llx", config.mask & ~mdev->config_attr_mask); err = -EOPNOTSUPP; goto err; } classes = vdpa_mgmtdev_get_classes(mdev, &ncls); if (config.mask & VDPA_DEV_NET_ATTRS_MASK && !(classes & BIT_ULL(VIRTIO_ID_NET))) { NL_SET_ERR_MSG_MOD(info->extack, "Network class attributes provided on unsupported management device"); err = -EINVAL; goto err; } if (!(config.mask & VDPA_DEV_NET_ATTRS_MASK) && config.mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES) && classes & BIT_ULL(VIRTIO_ID_NET) && ncls > 1 && config.device_features & VIRTIO_DEVICE_F_MASK) { NL_SET_ERR_MSG_MOD(info->extack, "Management device supports multi-class while device features specified are ambiguous"); err = -EINVAL; goto err; } err = mdev->ops->dev_add(mdev, name, &config); err: up_write(&vdpa_dev_lock); return err; } static int vdpa_nl_cmd_dev_del_set_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_mgmt_dev *mdev; struct vdpa_device *vdev; struct device *dev; const char *name; int err = 0; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); down_write(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "Only user created device can be deleted by user"); err = -EINVAL; goto mdev_err; } mdev = vdev->mdev; mdev->ops->dev_del(mdev, vdev); mdev_err: put_device(dev); dev_err: up_write(&vdpa_dev_lock); return err; } static int vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { u16 max_vq_size; u16 min_vq_size = 1; u32 device_id; u32 vendor_id; void *hdr; int err; hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_NEW); if (!hdr) return -EMSGSIZE; err = vdpa_nl_mgmtdev_handle_fill(msg, vdev->mdev); if (err) goto msg_err; device_id = vdev->config->get_device_id(vdev); vendor_id = vdev->config->get_vendor_id(vdev); max_vq_size = vdev->config->get_vq_num_max(vdev); if (vdev->config->get_vq_num_min) min_vq_size = vdev->config->get_vq_num_min(vdev); err = -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) goto msg_err; if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) goto msg_err; if (nla_put_u32(msg, VDPA_ATTR_DEV_VENDOR_ID, vendor_id)) goto msg_err; if (nla_put_u32(msg, VDPA_ATTR_DEV_MAX_VQS, vdev->nvqs)) goto msg_err; if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size)) goto msg_err; if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size)) goto msg_err; genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); return err; } static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct sk_buff *msg; const char *devname; struct device *dev; int err; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { err = -EINVAL; goto mdev_err; } err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack); if (err) goto mdev_err; err = genlmsg_reply(msg, info); put_device(dev); up_read(&vdpa_dev_lock); return err; mdev_err: put_device(dev); err: up_read(&vdpa_dev_lock); nlmsg_free(msg); return err; } struct vdpa_dev_dump_info { struct sk_buff *msg; struct netlink_callback *cb; int start_idx; int idx; }; static int vdpa_dev_dump(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); struct vdpa_dev_dump_info *info = data; int err; if (!vdev->mdev) return 0; if (info->idx < info->start_idx) { info->idx++; return 0; } err = vdpa_dev_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, info->cb->nlh->nlmsg_seq, NLM_F_MULTI, info->cb->extack); if (err) return err; info->idx++; return 0; } static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct vdpa_dev_dump_info info; info.msg = msg; info.cb = cb; info.start_idx = cb->args[0]; info.idx = 0; down_read(&vdpa_dev_lock); bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_dump); up_read(&vdpa_dev_lock); cb->args[0] = info.idx; return msg->len; } static int vdpa_dev_net_mq_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_NET_F_MQ)) == 0 && (features & BIT_ULL(VIRTIO_NET_F_RSS)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->max_virtqueue_pairs); return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); } static int vdpa_dev_net_mtu_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_NET_F_MTU)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->mtu); return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16); } static int vdpa_dev_net_mac_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { if ((features & BIT_ULL(VIRTIO_NET_F_MAC)) == 0) return 0; return nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config->mac), config->mac); } static int vdpa_dev_net_status_config_fill(struct sk_buff *msg, u64 features, const struct virtio_net_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_NET_F_STATUS)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->status); return nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16); } static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_net_config config = {}; u64 features_device; vdev->config->get_config(vdev, 0, &config, sizeof(config)); features_device = vdev->config->get_device_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; if (vdpa_dev_net_mtu_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_net_mac_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_net_status_config_fill(msg, features_device, &config)) return -EMSGSIZE; return vdpa_dev_net_mq_config_fill(msg, features_device, &config); } static int vdpa_dev_blk_capacity_config_fill(struct sk_buff *msg, const struct virtio_blk_config *config) { u64 val_u64; val_u64 = __virtio64_to_cpu(true, config->capacity); return nla_put_u64_64bit(msg, VDPA_ATTR_DEV_BLK_CFG_CAPACITY, val_u64, VDPA_ATTR_PAD); } static int vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_SIZE_MAX)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->size_max); return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32); } /* fill the block size*/ static int vdpa_dev_blk_block_size_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_BLK_SIZE)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->blk_size); return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, val_u32); } static int vdpa_dev_blk_seg_max_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_SEG_MAX)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->seg_max); return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, val_u32); } static int vdpa_dev_blk_mq_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u16 val_u16; if ((features & BIT_ULL(VIRTIO_BLK_F_MQ)) == 0) return 0; val_u16 = __virtio16_to_cpu(true, config->num_queues); return nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, val_u16); } static int vdpa_dev_blk_topology_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u16 min_io_size; u32 opt_io_size; if ((features & BIT_ULL(VIRTIO_BLK_F_TOPOLOGY)) == 0) return 0; min_io_size = __virtio16_to_cpu(true, config->min_io_size); opt_io_size = __virtio32_to_cpu(true, config->opt_io_size); if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_PHY_BLK_EXP, config->physical_block_exp)) return -EMSGSIZE; if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_ALIGN_OFFSET, config->alignment_offset)) return -EMSGSIZE; if (nla_put_u16(msg, VDPA_ATTR_DEV_BLK_CFG_MIN_IO_SIZE, min_io_size)) return -EMSGSIZE; if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_OPT_IO_SIZE, opt_io_size)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_discard_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_DISCARD)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->max_discard_sectors); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEC, val_u32)) return -EMSGSIZE; val_u32 = __virtio32_to_cpu(true, config->max_discard_seg); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_DISCARD_SEG, val_u32)) return -EMSGSIZE; val_u32 = __virtio32_to_cpu(true, config->discard_sector_alignment); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN, val_u32)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_write_zeroes_config_fill(struct sk_buff *msg, u64 features, const struct virtio_blk_config *config) { u32 val_u32; if ((features & BIT_ULL(VIRTIO_BLK_F_WRITE_ZEROES)) == 0) return 0; val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_sectors); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, val_u32)) return -EMSGSIZE; val_u32 = __virtio32_to_cpu(true, config->max_write_zeroes_seg); if (nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, val_u32)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features) { u8 ro; ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1; if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features) { u8 flush; flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1; if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush)) return -EMSGSIZE; return 0; } static int vdpa_dev_blk_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) { struct virtio_blk_config config = {}; u64 features_device; vdev->config->get_config(vdev, 0, &config, sizeof(config)); features_device = vdev->config->get_device_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; if (vdpa_dev_blk_capacity_config_fill(msg, &config)) return -EMSGSIZE; if (vdpa_dev_blk_seg_size_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_block_size_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_seg_max_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_mq_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_topology_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_discard_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_write_zeroes_config_fill(msg, features_device, &config)) return -EMSGSIZE; if (vdpa_dev_blk_ro_config_fill(msg, features_device)) return -EMSGSIZE; if (vdpa_dev_blk_flush_config_fill(msg, features_device)) return -EMSGSIZE; return 0; } static int vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { u64 features_driver; u8 status = 0; u32 device_id; void *hdr; int err; down_read(&vdev->cf_lock); hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_CONFIG_GET); if (!hdr) { err = -EMSGSIZE; goto out; } if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { err = -EMSGSIZE; goto msg_err; } device_id = vdev->config->get_device_id(vdev); if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { err = -EMSGSIZE; goto msg_err; } /* only read driver features after the feature negotiation is done */ status = vdev->config->get_status(vdev); if (status & VIRTIO_CONFIG_S_FEATURES_OK) { features_driver = vdev->config->get_driver_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features_driver, VDPA_ATTR_PAD)) { err = -EMSGSIZE; goto msg_err; } } switch (device_id) { case VIRTIO_ID_NET: err = vdpa_dev_net_config_fill(vdev, msg); break; case VIRTIO_ID_BLOCK: err = vdpa_dev_blk_config_fill(vdev, msg); break; default: err = -EOPNOTSUPP; break; } if (err) goto msg_err; up_read(&vdev->cf_lock); genlmsg_end(msg, hdr); return 0; msg_err: genlmsg_cancel(msg, hdr); out: up_read(&vdev->cf_lock); return err; } static int vdpa_fill_stats_rec(struct vdpa_device *vdev, struct sk_buff *msg, struct genl_info *info, u32 index) { struct virtio_net_config config = {}; u64 features; u8 status; int err; status = vdev->config->get_status(vdev); if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { NL_SET_ERR_MSG_MOD(info->extack, "feature negotiation not complete"); return -EAGAIN; } vdpa_get_config_unlocked(vdev, 0, &config, sizeof(config)); features = vdev->config->get_driver_features(vdev); if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_NEGOTIATED_FEATURES, features, VDPA_ATTR_PAD)) return -EMSGSIZE; err = vdpa_dev_net_mq_config_fill(msg, features, &config); if (err) return err; if (nla_put_u32(msg, VDPA_ATTR_DEV_QUEUE_INDEX, index)) return -EMSGSIZE; err = vdev->config->get_vendor_vq_stats(vdev, index, msg, info->extack); if (err) return err; return 0; } static int vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, struct genl_info *info, u32 index) { int err; down_read(&vdev->cf_lock); if (!vdev->config->get_vendor_vq_stats) { err = -EOPNOTSUPP; goto out; } err = vdpa_fill_stats_rec(vdev, msg, info, index); out: up_read(&vdev->cf_lock); return err; } static int vdpa_dev_vendor_stats_fill(struct vdpa_device *vdev, struct sk_buff *msg, struct genl_info *info, u32 index) { u32 device_id; void *hdr; int err; u32 portid = info->snd_portid; u32 seq = info->snd_seq; u32 flags = 0; hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_VSTATS_GET); if (!hdr) return -EMSGSIZE; if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { err = -EMSGSIZE; goto undo_msg; } device_id = vdev->config->get_device_id(vdev); if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { err = -EMSGSIZE; goto undo_msg; } switch (device_id) { case VIRTIO_ID_NET: if (index > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { NL_SET_ERR_MSG_MOD(info->extack, "queue index exceeds max value"); err = -ERANGE; break; } err = vendor_stats_fill(vdev, msg, info, index); break; default: err = -EOPNOTSUPP; break; } genlmsg_end(msg, hdr); return err; undo_msg: genlmsg_cancel(msg, hdr); return err; } static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct sk_buff *msg; const char *devname; struct device *dev; int err; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); err = -EINVAL; goto mdev_err; } err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack); if (!err) err = genlmsg_reply(msg, info); mdev_err: put_device(dev); dev_err: up_read(&vdpa_dev_lock); if (err) nlmsg_free(msg); return err; } static int vdpa_dev_net_device_attr_set(struct vdpa_device *vdev, struct genl_info *info) { struct vdpa_dev_set_config set_config = {}; struct vdpa_mgmt_dev *mdev = vdev->mdev; struct nlattr **nl_attrs = info->attrs; const u8 *macaddr; int err = -EOPNOTSUPP; down_write(&vdev->cf_lock); if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { set_config.mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR); macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); if (is_valid_ether_addr(macaddr)) { ether_addr_copy(set_config.net.mac, macaddr); if (mdev->ops->dev_set_attr) { err = mdev->ops->dev_set_attr(mdev, vdev, &set_config); } else { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Operation not supported by the device."); } } else { NL_SET_ERR_MSG_FMT_MOD(info->extack, "Invalid MAC address"); } } up_write(&vdev->cf_lock); return err; } static int vdpa_nl_cmd_dev_attr_set_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct device *dev; const char *name; u64 classes; int err = 0; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); down_write(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); err = -EINVAL; goto mdev_err; } classes = vdpa_mgmtdev_get_classes(vdev->mdev, NULL); if (classes & BIT_ULL(VIRTIO_ID_NET)) { err = vdpa_dev_net_device_attr_set(vdev, info); } else { NL_SET_ERR_MSG_FMT_MOD(info->extack, "%s device not supported", name); } mdev_err: put_device(dev); dev_err: up_write(&vdpa_dev_lock); return err; } static int vdpa_dev_config_dump(struct device *dev, void *data) { struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); struct vdpa_dev_dump_info *info = data; int err; if (!vdev->mdev) return 0; if (info->idx < info->start_idx) { info->idx++; return 0; } err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, info->cb->nlh->nlmsg_seq, NLM_F_MULTI, info->cb->extack); if (err) return err; info->idx++; return 0; } static int vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { struct vdpa_dev_dump_info info; info.msg = msg; info.cb = cb; info.start_idx = cb->args[0]; info.idx = 0; down_read(&vdpa_dev_lock); bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); up_read(&vdpa_dev_lock); cb->args[0] = info.idx; return msg->len; } static int vdpa_nl_cmd_dev_stats_get_doit(struct sk_buff *skb, struct genl_info *info) { struct vdpa_device *vdev; struct sk_buff *msg; const char *devname; struct device *dev; u32 index; int err; if (!info->attrs[VDPA_ATTR_DEV_NAME]) return -EINVAL; if (!info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]) return -EINVAL; devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; index = nla_get_u32(info->attrs[VDPA_ATTR_DEV_QUEUE_INDEX]); down_read(&vdpa_dev_lock); dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); if (!dev) { NL_SET_ERR_MSG_MOD(info->extack, "device not found"); err = -ENODEV; goto dev_err; } vdev = container_of(dev, struct vdpa_device, dev); if (!vdev->mdev) { NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); err = -EINVAL; goto mdev_err; } err = vdpa_dev_vendor_stats_fill(vdev, msg, info, index); if (err) goto mdev_err; err = genlmsg_reply(msg, info); put_device(dev); up_read(&vdpa_dev_lock); return err; mdev_err: put_device(dev); dev_err: nlmsg_free(msg); up_read(&vdpa_dev_lock); return err; } static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 }, /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 }, [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 }, }; static const struct genl_ops vdpa_nl_ops[] = { { .cmd = VDPA_CMD_MGMTDEV_GET, .doit = vdpa_nl_cmd_mgmtdev_get_doit, .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_NEW, .doit = vdpa_nl_cmd_dev_add_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_DEL, .doit = vdpa_nl_cmd_dev_del_set_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_GET, .doit = vdpa_nl_cmd_dev_get_doit, .dumpit = vdpa_nl_cmd_dev_get_dumpit, }, { .cmd = VDPA_CMD_DEV_CONFIG_GET, .doit = vdpa_nl_cmd_dev_config_get_doit, .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, }, { .cmd = VDPA_CMD_DEV_VSTATS_GET, .doit = vdpa_nl_cmd_dev_stats_get_doit, .flags = GENL_ADMIN_PERM, }, { .cmd = VDPA_CMD_DEV_ATTR_SET, .doit = vdpa_nl_cmd_dev_attr_set_doit, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family vdpa_nl_family __ro_after_init = { .name = VDPA_GENL_NAME, .version = VDPA_GENL_VERSION, .maxattr = VDPA_ATTR_MAX, .policy = vdpa_nl_policy, .netnsok = false, .module = THIS_MODULE, .ops = vdpa_nl_ops, .n_ops = ARRAY_SIZE(vdpa_nl_ops), .resv_start_op = VDPA_CMD_DEV_VSTATS_GET + 1, }; static int vdpa_init(void) { int err; err = bus_register(&vdpa_bus); if (err) return err; err = genl_register_family(&vdpa_nl_family); if (err) goto err; return 0; err: bus_unregister(&vdpa_bus); return err; } static void __exit vdpa_exit(void) { genl_unregister_family(&vdpa_nl_family); bus_unregister(&vdpa_bus); ida_destroy(&vdpa_index_ida); } core_initcall(vdpa_init); module_exit(vdpa_exit); MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>"); MODULE_DESCRIPTION("vDPA bus"); MODULE_LICENSE("GPL v2"); |
31 2739 1 2763 2764 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/export.h> #include <linux/uaccess.h> #include <linux/mm.h> #include <linux/bitops.h> #include <asm/word-at-a-time.h> /* * Do a strnlen, return length of string *with* final '\0'. * 'count' is the user-supplied count, while 'max' is the * address space maximum. * * Return 0 for exceptions (which includes hitting the address * space maximum), or 'count+1' if hitting the user-supplied * maximum count. * * NOTE! We can sometimes overshoot the user-supplied maximum * if it fits in a aligned 'long'. The caller needs to check * the return value against "> max". */ static __always_inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; unsigned long align, res = 0; unsigned long c; /* * Do everything aligned. But that means that we * need to also expand the maximum.. */ align = (sizeof(unsigned long) - 1) & (unsigned long)src; src -= align; max += align; unsafe_get_user(c, (unsigned long __user *)src, efault); c |= aligned_byte_mask(align); for (;;) { unsigned long data; if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); return res + find_zero(data) + 1 - align; } res += sizeof(unsigned long); /* We already handled 'unsigned long' bytes. Did we do it all ? */ if (unlikely(max <= sizeof(unsigned long))) break; max -= sizeof(unsigned long); unsafe_get_user(c, (unsigned long __user *)(src+res), efault); } res -= align; /* * Uhhuh. We hit 'max'. But was that the user-specified maximum * too? If so, return the marker for "too long". */ if (res >= count) return count+1; /* * Nope: we hit the address space limit, and we still had more * characters the caller would have wanted. That's 0. */ efault: return 0; } /** * strnlen_user: - Get the size of a user string INCLUDING final NUL. * @str: The string to measure. * @count: Maximum count (including NUL character) * * Context: User context only. This function may sleep if pagefaults are * enabled. * * Get the size of a NUL-terminated string in user space. * * Returns the size of the string INCLUDING the terminating NUL. * If the string is too long, returns a number larger than @count. User * has to check the return value against "> count". * On exception (or invalid count), returns 0. * * NOTE! You should basically never use this function. There is * almost never any valid case for using the length of a user space * string, since the string can be changed at any time by other * threads. Use "strncpy_from_user()" instead to get a stable copy * of the string. */ long strnlen_user(const char __user *str, long count) { unsigned long max_addr, src_addr; if (unlikely(count <= 0)) return 0; if (can_do_masked_user_access()) { long retval; str = masked_user_access_begin(str); retval = do_strnlen_user(str, count, count); user_read_access_end(); return retval; } max_addr = TASK_SIZE_MAX; src_addr = (unsigned long)untagged_addr(str); if (likely(src_addr < max_addr)) { unsigned long max = max_addr - src_addr; long retval; /* * Truncate 'max' to the user-specified limit, so that * we only have one limit we need to check in the loop */ if (max > count) max = count; if (user_read_access_begin(str, max)) { retval = do_strnlen_user(str, count, max); user_read_access_end(); return retval; } } return 0; } EXPORT_SYMBOL(strnlen_user); |
78 78 42 6 39 56 10 7 1 5 5 2 3 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * seq_oss_writeq.c - write queue and sync * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_writeq.h" #include "seq_oss_event.h" #include "seq_oss_timer.h" #include <sound/seq_oss_legacy.h> #include "../seq_lock.h" #include "../seq_clientmgr.h" #include <linux/wait.h> #include <linux/slab.h> #include <linux/sched/signal.h> /* * create a write queue record */ struct seq_oss_writeq * snd_seq_oss_writeq_new(struct seq_oss_devinfo *dp, int maxlen) { struct seq_oss_writeq *q; struct snd_seq_client_pool pool; q = kzalloc(sizeof(*q), GFP_KERNEL); if (!q) return NULL; q->dp = dp; q->maxlen = maxlen; spin_lock_init(&q->sync_lock); q->sync_event_put = 0; q->sync_time = 0; init_waitqueue_head(&q->sync_sleep); memset(&pool, 0, sizeof(pool)); pool.client = dp->cseq; pool.output_pool = maxlen; pool.output_room = maxlen / 2; snd_seq_oss_control(dp, SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, &pool); return q; } /* * delete the write queue */ void snd_seq_oss_writeq_delete(struct seq_oss_writeq *q) { if (q) { snd_seq_oss_writeq_clear(q); /* to be sure */ kfree(q); } } /* * reset the write queue */ void snd_seq_oss_writeq_clear(struct seq_oss_writeq *q) { struct snd_seq_remove_events reset; memset(&reset, 0, sizeof(reset)); reset.remove_mode = SNDRV_SEQ_REMOVE_OUTPUT; /* remove all */ snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_REMOVE_EVENTS, &reset); /* wake up sleepers if any */ snd_seq_oss_writeq_wakeup(q, 0); } /* * wait until the write buffer has enough room */ int snd_seq_oss_writeq_sync(struct seq_oss_writeq *q) { struct seq_oss_devinfo *dp = q->dp; abstime_t time; time = snd_seq_oss_timer_cur_tick(dp->timer); if (q->sync_time >= time) return 0; /* already finished */ if (! q->sync_event_put) { struct snd_seq_event ev; union evrec *rec; /* put echoback event */ memset(&ev, 0, sizeof(ev)); ev.flags = 0; ev.type = SNDRV_SEQ_EVENT_ECHO; ev.time.tick = time; /* echo back to itself */ snd_seq_oss_fill_addr(dp, &ev, dp->addr.client, dp->addr.port); rec = (union evrec *)&ev.data; rec->t.code = SEQ_SYNCTIMER; rec->t.time = time; q->sync_event_put = 1; snd_seq_kernel_client_enqueue(dp->cseq, &ev, NULL, true); } wait_event_interruptible_timeout(q->sync_sleep, ! q->sync_event_put, HZ); if (signal_pending(current)) /* interrupted - return 0 to finish sync */ q->sync_event_put = 0; if (! q->sync_event_put || q->sync_time >= time) return 0; return 1; } /* * wake up sync - echo event was catched */ void snd_seq_oss_writeq_wakeup(struct seq_oss_writeq *q, abstime_t time) { unsigned long flags; spin_lock_irqsave(&q->sync_lock, flags); q->sync_time = time; q->sync_event_put = 0; wake_up(&q->sync_sleep); spin_unlock_irqrestore(&q->sync_lock, flags); } /* * return the unused pool size */ int snd_seq_oss_writeq_get_free_size(struct seq_oss_writeq *q) { struct snd_seq_client_pool pool; pool.client = q->dp->cseq; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, &pool); return pool.output_free; } /* * set output threshold size from ioctl */ void snd_seq_oss_writeq_set_output(struct seq_oss_writeq *q, int val) { struct snd_seq_client_pool pool; pool.client = q->dp->cseq; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, &pool); pool.output_room = val; snd_seq_oss_control(q->dp, SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, &pool); } |
435 36 36 55 16 72 14 17 17 75 131 38 94 2 35 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * NET Generic infrastructure for Network protocols. * * Definitions for request_sock * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * From code originally in include/net/tcp.h */ #ifndef _REQUEST_SOCK_H #define _REQUEST_SOCK_H #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/bug.h> #include <linux/refcount.h> #include <net/sock.h> #include <net/rstreason.h> struct request_sock; struct sk_buff; struct dst_entry; struct proto; struct request_sock_ops { int family; unsigned int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(const struct sock *sk, struct request_sock *req); void (*send_ack)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); void (*syn_ack_timeout)(const struct request_sock *req); }; int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req); struct saved_syn { u32 mac_hdrlen; u32 network_hdrlen; u32 tcp_hdrlen; u8 data[]; }; /* struct request_sock - mini sock to represent a connection request */ struct request_sock { struct sock_common __req_common; #define rsk_refcnt __req_common.skc_refcnt #define rsk_hash __req_common.skc_hash #define rsk_listener __req_common.skc_listener #define rsk_window_clamp __req_common.skc_window_clamp #define rsk_rcv_wnd __req_common.skc_rcv_wnd struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ u8 syncookie:1; /* True if * 1) tcpopts needs to be encoded in * TS of SYN+ACK * 2) ACK is validated by BPF kfunc. */ u8 num_timeout:7; /* number of timeouts */ u32 ts_recent; struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; struct saved_syn *saved_syn; u32 secid; u32 peer_secid; u32 timeout; }; static inline struct request_sock *inet_reqsk(const struct sock *sk) { return (struct request_sock *)sk; } static inline struct sock *req_to_sk(struct request_sock *req) { return (struct sock *)req; } /** * skb_steal_sock - steal a socket from an sk_buff * @skb: sk_buff to steal the socket from * @refcounted: is set to true if the socket is reference-counted * @prefetched: is set to true if the socket was assigned from bpf */ static inline struct sock *skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched) { struct sock *sk = skb->sk; if (!sk) { *prefetched = false; *refcounted = false; return NULL; } *prefetched = skb_sk_is_prefetched(skb); if (*prefetched) { #if IS_ENABLED(CONFIG_SYN_COOKIES) if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { struct request_sock *req = inet_reqsk(sk); *refcounted = false; sk = req->rsk_listener; req->rsk_listener = NULL; return sk; } #endif *refcounted = sk_is_refcounted(sk); } else { *refcounted = true; } skb->destructor = NULL; skb->sk = NULL; return sk; } static inline void __reqsk_free(struct request_sock *req) { req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } static inline void reqsk_free(struct request_sock *req) { DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); __reqsk_free(req); } static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) __reqsk_free(req); } /* * For a TCP Fast Open listener - * lock - protects the access to all the reqsk, which is co-owned by * the listener and the child socket. * qlen - pending TFO requests (still in TCP_SYN_RECV). * max_qlen - max TFO reqs allowed before TFO is disabled. * * XXX (TFO) - ideally these fields can be made as part of "listen_sock" * structure above. But there is some implementation difficulty due to * listen_sock being part of request_sock_queue hence will be freed when * a listener is stopped. But TFO related fields may continue to be * accessed even after a listener is closed, until its sk_refcnt drops * to 0 implying no more outstanding TFO reqs. One solution is to keep * listen_opt around until sk_refcnt drops to 0. But there is some other * complexity that needs to be resolved. E.g., a listener can be disabled * temporarily through shutdown()->tcp_disconnect(), and re-enabled later. */ struct fastopen_queue { struct request_sock *rskq_rst_head; /* Keep track of past TFO */ struct request_sock *rskq_rst_tail; /* requests that caused RST. * This is part of the defense * against spoofing attack. */ spinlock_t lock; int qlen; /* # of pending (TCP_SYN_RECV) reqs */ int max_qlen; /* != 0 iff TFO is currently enabled */ struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */ }; /** struct request_sock_queue - queue of request_socks * * @rskq_accept_head - FIFO head of established children * @rskq_accept_tail - FIFO tail of established children * @rskq_defer_accept - User waits for some data after accept() * */ struct request_sock_queue { spinlock_t rskq_lock; u8 rskq_defer_accept; u32 synflood_warned; atomic_t qlen; atomic_t young; struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine * if TFO is enabled. */ }; void reqsk_queue_alloc(struct request_sock_queue *queue); void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset); static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) { return READ_ONCE(queue->rskq_accept_head) == NULL; } static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, struct sock *parent) { struct request_sock *req; spin_lock_bh(&queue->rskq_lock); req = queue->rskq_accept_head; if (req) { sk_acceptq_removed(parent); WRITE_ONCE(queue->rskq_accept_head, req->dl_next); if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; } spin_unlock_bh(&queue->rskq_lock); return req; } static inline void reqsk_queue_removed(struct request_sock_queue *queue, const struct request_sock *req) { if (req->num_timeout == 0) atomic_dec(&queue->young); atomic_dec(&queue->qlen); } static inline void reqsk_queue_added(struct request_sock_queue *queue) { atomic_inc(&queue->young); atomic_inc(&queue->qlen); } static inline int reqsk_queue_len(const struct request_sock_queue *queue) { return atomic_read(&queue->qlen); } static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { return atomic_read(&queue->young); } /* RFC 7323 2.3 Using the Window Scale Option * The window field (SEG.WND) of every outgoing segment, with the * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits. * * This means the SEG.WND carried in SYNACK can not exceed 65535. * We use this property to harden TCP stack while in NEW_SYN_RECV state. */ static inline u32 tcp_synack_window(const struct request_sock *req) { return min(req->rsk_rcv_wnd, 65535U); } #endif /* _REQUEST_SOCK_H */ |
1 1 6 3 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 | // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/media/radio/si470x/radio-si470x-common.c * * Driver for radios with Silicon Labs Si470x FM Radio Receivers * * Copyright (c) 2009 Tobias Lorenz <tobias.lorenz@gmx.net> * Copyright (c) 2012 Hans de Goede <hdegoede@redhat.com> */ /* * History: * 2008-01-12 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.0 * - First working version * 2008-01-13 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.1 * - Improved error handling, every function now returns errno * - Improved multi user access (start/mute/stop) * - Channel doesn't get lost anymore after start/mute/stop * - RDS support added (polling mode via interrupt EP 1) * - marked default module parameters with *value* * - switched from bit structs to bit masks * - header file cleaned and integrated * 2008-01-14 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.2 * - hex values are now lower case * - commented USB ID for ADS/Tech moved on todo list * - blacklisted si470x in hid-quirks.c * - rds buffer handling functions integrated into *_work, *_read * - rds_command in si470x_poll exchanged against simple retval * - check for firmware version 15 * - code order and prototypes still remain the same * - spacing and bottom of band codes remain the same * 2008-01-16 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.3 * - code reordered to avoid function prototypes * - switch/case defaults are now more user-friendly * - unified comment style * - applied all checkpatch.pl v1.12 suggestions * except the warning about the too long lines with bit comments * - renamed FMRADIO to RADIO to cut line length (checkpatch.pl) * 2008-01-22 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.4 * - avoid poss. locking when doing copy_to_user which may sleep * - RDS is automatically activated on read now * - code cleaned of unnecessary rds_commands * - USB Vendor/Product ID for ADS/Tech FM Radio Receiver verified * (thanks to Guillaume RAMOUSSE) * 2008-01-27 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.5 * - number of seek_retries changed to tune_timeout * - fixed problem with incomplete tune operations by own buffers * - optimization of variables and printf types * - improved error logging * 2008-01-31 Tobias Lorenz <tobias.lorenz@gmx.net> * Oliver Neukum <oliver@neukum.org> * Version 1.0.6 * - fixed coverity checker warnings in *_usb_driver_disconnect * - probe()/open() race by correct ordering in probe() * - DMA coherency rules by separate allocation of all buffers * - use of endianness macros * - abuse of spinlock, replaced by mutex * - racy handling of timer in disconnect, * replaced by delayed_work * - racy interruptible_sleep_on(), * replaced with wait_event_interruptible() * - handle signals in read() * 2008-02-08 Tobias Lorenz <tobias.lorenz@gmx.net> * Oliver Neukum <oliver@neukum.org> * Version 1.0.7 * - usb autosuspend support * - unplugging fixed * 2008-05-07 Tobias Lorenz <tobias.lorenz@gmx.net> * Version 1.0.8 * - hardware frequency seek support * - afc indication * - more safety checks, let si470x_get_freq return errno * - vidioc behavior corrected according to v4l2 spec * 2008-10-20 Alexey Klimov <klimov.linux@gmail.com> * - add support for KWorld USB FM Radio FM700 * - blacklisted KWorld radio in hid-core.c and hid-ids.h * 2008-12-03 Mark Lord <mlord@pobox.com> * - add support for DealExtreme USB Radio * 2009-01-31 Bob Ross <pigiron@gmx.com> * - correction of stereo detection/setting * - correction of signal strength indicator scaling * 2009-01-31 Rick Bronson <rick@efn.org> * Tobias Lorenz <tobias.lorenz@gmx.net> * - add LED status output * - get HW/SW version from scratchpad * 2009-06-16 Edouard Lafargue <edouard@lafargue.name> * Version 1.0.10 * - add support for interrupt mode for RDS endpoint, * instead of polling. * Improves RDS reception significantly */ /* kernel includes */ #include "radio-si470x.h" /************************************************************************** * Module Parameters **************************************************************************/ /* Spacing (kHz) */ /* 0: 200 kHz (USA, Australia) */ /* 1: 100 kHz (Europe, Japan) */ /* 2: 50 kHz */ static unsigned short space = 2; module_param(space, ushort, 0444); MODULE_PARM_DESC(space, "Spacing: 0=200kHz 1=100kHz *2=50kHz*"); /* De-emphasis */ /* 0: 75 us (USA) */ /* 1: 50 us (Europe, Australia, Japan) */ static unsigned short de = 1; module_param(de, ushort, 0444); MODULE_PARM_DESC(de, "De-emphasis: 0=75us *1=50us*"); /* Tune timeout */ static unsigned int tune_timeout = 3000; module_param(tune_timeout, uint, 0644); MODULE_PARM_DESC(tune_timeout, "Tune timeout: *3000*"); /* Seek timeout */ static unsigned int seek_timeout = 5000; module_param(seek_timeout, uint, 0644); MODULE_PARM_DESC(seek_timeout, "Seek timeout: *5000*"); static const struct v4l2_frequency_band bands[] = { { .type = V4L2_TUNER_RADIO, .index = 0, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_FREQ_BANDS | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP, .rangelow = 87500 * 16, .rangehigh = 108000 * 16, .modulation = V4L2_BAND_MODULATION_FM, }, { .type = V4L2_TUNER_RADIO, .index = 1, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_FREQ_BANDS | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP, .rangelow = 76000 * 16, .rangehigh = 108000 * 16, .modulation = V4L2_BAND_MODULATION_FM, }, { .type = V4L2_TUNER_RADIO, .index = 2, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_FREQ_BANDS | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP, .rangelow = 76000 * 16, .rangehigh = 90000 * 16, .modulation = V4L2_BAND_MODULATION_FM, }, }; /************************************************************************** * Generic Functions **************************************************************************/ /* * si470x_set_band - set the band */ static int si470x_set_band(struct si470x_device *radio, int band) { if (radio->band == band) return 0; radio->band = band; radio->registers[SYSCONFIG2] &= ~SYSCONFIG2_BAND; radio->registers[SYSCONFIG2] |= radio->band << 6; return radio->set_register(radio, SYSCONFIG2); } /* * si470x_set_chan - set the channel */ static int si470x_set_chan(struct si470x_device *radio, unsigned short chan) { int retval; unsigned long time_left; bool timed_out = false; retval = radio->get_register(radio, POWERCFG); if (retval) return retval; if ((radio->registers[POWERCFG] & (POWERCFG_ENABLE|POWERCFG_DMUTE)) != (POWERCFG_ENABLE|POWERCFG_DMUTE)) { return 0; } /* start tuning */ radio->registers[CHANNEL] &= ~CHANNEL_CHAN; radio->registers[CHANNEL] |= CHANNEL_TUNE | chan; retval = radio->set_register(radio, CHANNEL); if (retval < 0) goto done; /* wait till tune operation has completed */ reinit_completion(&radio->completion); time_left = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(tune_timeout)); if (time_left == 0) timed_out = true; if ((radio->registers[STATUSRSSI] & STATUSRSSI_STC) == 0) dev_warn(&radio->videodev.dev, "tune does not complete\n"); if (timed_out) dev_warn(&radio->videodev.dev, "tune timed out after %u ms\n", tune_timeout); /* stop tuning */ radio->registers[CHANNEL] &= ~CHANNEL_TUNE; retval = radio->set_register(radio, CHANNEL); done: return retval; } /* * si470x_get_step - get channel spacing */ static unsigned int si470x_get_step(struct si470x_device *radio) { /* Spacing (kHz) */ switch ((radio->registers[SYSCONFIG2] & SYSCONFIG2_SPACE) >> 4) { /* 0: 200 kHz (USA, Australia) */ case 0: return 200 * 16; /* 1: 100 kHz (Europe, Japan) */ case 1: return 100 * 16; /* 2: 50 kHz */ default: return 50 * 16; } } /* * si470x_get_freq - get the frequency */ static int si470x_get_freq(struct si470x_device *radio, unsigned int *freq) { int chan, retval; /* read channel */ retval = radio->get_register(radio, READCHAN); chan = radio->registers[READCHAN] & READCHAN_READCHAN; /* Frequency (MHz) = Spacing (kHz) x Channel + Bottom of Band (MHz) */ *freq = chan * si470x_get_step(radio) + bands[radio->band].rangelow; return retval; } /* * si470x_set_freq - set the frequency */ int si470x_set_freq(struct si470x_device *radio, unsigned int freq) { unsigned short chan; freq = clamp(freq, bands[radio->band].rangelow, bands[radio->band].rangehigh); /* Chan = [ Freq (Mhz) - Bottom of Band (MHz) ] / Spacing (kHz) */ chan = (freq - bands[radio->band].rangelow) / si470x_get_step(radio); return si470x_set_chan(radio, chan); } EXPORT_SYMBOL_GPL(si470x_set_freq); /* * si470x_set_seek - set seek */ static int si470x_set_seek(struct si470x_device *radio, const struct v4l2_hw_freq_seek *seek) { int band, retval; unsigned int freq; bool timed_out = false; unsigned long time_left; /* set band */ if (seek->rangelow || seek->rangehigh) { for (band = 0; band < ARRAY_SIZE(bands); band++) { if (bands[band].rangelow == seek->rangelow && bands[band].rangehigh == seek->rangehigh) break; } if (band == ARRAY_SIZE(bands)) return -EINVAL; /* No matching band found */ } else band = 1; /* If nothing is specified seek 76 - 108 Mhz */ if (radio->band != band) { retval = si470x_get_freq(radio, &freq); if (retval) return retval; retval = si470x_set_band(radio, band); if (retval) return retval; retval = si470x_set_freq(radio, freq); if (retval) return retval; } /* start seeking */ radio->registers[POWERCFG] |= POWERCFG_SEEK; if (seek->wrap_around) radio->registers[POWERCFG] &= ~POWERCFG_SKMODE; else radio->registers[POWERCFG] |= POWERCFG_SKMODE; if (seek->seek_upward) radio->registers[POWERCFG] |= POWERCFG_SEEKUP; else radio->registers[POWERCFG] &= ~POWERCFG_SEEKUP; retval = radio->set_register(radio, POWERCFG); if (retval < 0) return retval; /* wait till tune operation has completed */ reinit_completion(&radio->completion); time_left = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(seek_timeout)); if (time_left == 0) timed_out = true; if ((radio->registers[STATUSRSSI] & STATUSRSSI_STC) == 0) dev_warn(&radio->videodev.dev, "seek does not complete\n"); if (radio->registers[STATUSRSSI] & STATUSRSSI_SF) dev_warn(&radio->videodev.dev, "seek failed / band limit reached\n"); /* stop seeking */ radio->registers[POWERCFG] &= ~POWERCFG_SEEK; retval = radio->set_register(radio, POWERCFG); /* try again, if timed out */ if (retval == 0 && timed_out) return -ENODATA; return retval; } /* * si470x_start - switch on radio */ int si470x_start(struct si470x_device *radio) { int retval; /* powercfg */ radio->registers[POWERCFG] = POWERCFG_DMUTE | POWERCFG_ENABLE | POWERCFG_RDSM; retval = radio->set_register(radio, POWERCFG); if (retval < 0) goto done; /* sysconfig 1 */ radio->registers[SYSCONFIG1] |= SYSCONFIG1_RDSIEN | SYSCONFIG1_STCIEN | SYSCONFIG1_RDS; radio->registers[SYSCONFIG1] &= ~SYSCONFIG1_GPIO2; radio->registers[SYSCONFIG1] |= SYSCONFIG1_GPIO2_INT; if (de) radio->registers[SYSCONFIG1] |= SYSCONFIG1_DE; retval = radio->set_register(radio, SYSCONFIG1); if (retval < 0) goto done; /* sysconfig 2 */ radio->registers[SYSCONFIG2] = (0x1f << 8) | /* SEEKTH */ ((radio->band << 6) & SYSCONFIG2_BAND) |/* BAND */ ((space << 4) & SYSCONFIG2_SPACE) | /* SPACE */ 15; /* VOLUME (max) */ retval = radio->set_register(radio, SYSCONFIG2); if (retval < 0) goto done; /* reset last channel */ retval = si470x_set_chan(radio, radio->registers[CHANNEL] & CHANNEL_CHAN); done: return retval; } EXPORT_SYMBOL_GPL(si470x_start); /* * si470x_stop - switch off radio */ int si470x_stop(struct si470x_device *radio) { int retval; /* sysconfig 1 */ radio->registers[SYSCONFIG1] &= ~SYSCONFIG1_RDS; retval = radio->set_register(radio, SYSCONFIG1); if (retval < 0) goto done; /* powercfg */ radio->registers[POWERCFG] &= ~POWERCFG_DMUTE; /* POWERCFG_ENABLE has to automatically go low */ radio->registers[POWERCFG] |= POWERCFG_ENABLE | POWERCFG_DISABLE; retval = radio->set_register(radio, POWERCFG); done: return retval; } EXPORT_SYMBOL_GPL(si470x_stop); /* * si470x_rds_on - switch on rds reception */ static int si470x_rds_on(struct si470x_device *radio) { int retval; /* sysconfig 1 */ radio->registers[SYSCONFIG1] |= SYSCONFIG1_RDS; retval = radio->set_register(radio, SYSCONFIG1); if (retval < 0) radio->registers[SYSCONFIG1] &= ~SYSCONFIG1_RDS; return retval; } /************************************************************************** * File Operations Interface **************************************************************************/ /* * si470x_fops_read - read RDS data */ static ssize_t si470x_fops_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct si470x_device *radio = video_drvdata(file); int retval = 0; unsigned int block_count = 0; /* switch on rds reception */ if ((radio->registers[SYSCONFIG1] & SYSCONFIG1_RDS) == 0) si470x_rds_on(radio); /* block if no new data available */ while (radio->wr_index == radio->rd_index) { if (file->f_flags & O_NONBLOCK) { retval = -EWOULDBLOCK; goto done; } if (wait_event_interruptible(radio->read_queue, radio->wr_index != radio->rd_index) < 0) { retval = -EINTR; goto done; } } /* calculate block count from byte count */ count /= 3; /* copy RDS block out of internal buffer and to user buffer */ while (block_count < count) { if (radio->rd_index == radio->wr_index) break; /* always transfer rds complete blocks */ if (copy_to_user(buf, &radio->buffer[radio->rd_index], 3)) /* retval = -EFAULT; */ break; /* increment and wrap read pointer */ radio->rd_index += 3; if (radio->rd_index >= radio->buf_size) radio->rd_index = 0; /* increment counters */ block_count++; buf += 3; retval += 3; } done: return retval; } /* * si470x_fops_poll - poll RDS data */ static __poll_t si470x_fops_poll(struct file *file, struct poll_table_struct *pts) { struct si470x_device *radio = video_drvdata(file); __poll_t req_events = poll_requested_events(pts); __poll_t retval = v4l2_ctrl_poll(file, pts); if (req_events & (EPOLLIN | EPOLLRDNORM)) { /* switch on rds reception */ if ((radio->registers[SYSCONFIG1] & SYSCONFIG1_RDS) == 0) si470x_rds_on(radio); poll_wait(file, &radio->read_queue, pts); if (radio->rd_index != radio->wr_index) retval |= EPOLLIN | EPOLLRDNORM; } return retval; } static int si470x_fops_open(struct file *file) { struct si470x_device *radio = video_drvdata(file); return radio->fops_open(file); } /* * si470x_fops_release - file release */ static int si470x_fops_release(struct file *file) { struct si470x_device *radio = video_drvdata(file); return radio->fops_release(file); } /* * si470x_fops - file operations interface */ static const struct v4l2_file_operations si470x_fops = { .owner = THIS_MODULE, .read = si470x_fops_read, .poll = si470x_fops_poll, .unlocked_ioctl = video_ioctl2, .open = si470x_fops_open, .release = si470x_fops_release, }; /************************************************************************** * Video4Linux Interface **************************************************************************/ static int si470x_s_ctrl(struct v4l2_ctrl *ctrl) { struct si470x_device *radio = container_of(ctrl->handler, struct si470x_device, hdl); switch (ctrl->id) { case V4L2_CID_AUDIO_VOLUME: radio->registers[SYSCONFIG2] &= ~SYSCONFIG2_VOLUME; radio->registers[SYSCONFIG2] |= ctrl->val; return radio->set_register(radio, SYSCONFIG2); case V4L2_CID_AUDIO_MUTE: if (ctrl->val) radio->registers[POWERCFG] &= ~POWERCFG_DMUTE; else radio->registers[POWERCFG] |= POWERCFG_DMUTE; return radio->set_register(radio, POWERCFG); default: return -EINVAL; } } /* * si470x_vidioc_g_tuner - get tuner attributes */ static int si470x_vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *tuner) { struct si470x_device *radio = video_drvdata(file); int retval = 0; if (tuner->index != 0) return -EINVAL; if (!radio->status_rssi_auto_update) { retval = radio->get_register(radio, STATUSRSSI); if (retval < 0) return retval; } /* driver constants */ strscpy(tuner->name, "FM", sizeof(tuner->name)); tuner->type = V4L2_TUNER_RADIO; tuner->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_RDS | V4L2_TUNER_CAP_RDS_BLOCK_IO | V4L2_TUNER_CAP_HWSEEK_BOUNDED | V4L2_TUNER_CAP_HWSEEK_WRAP; tuner->rangelow = 76 * FREQ_MUL; tuner->rangehigh = 108 * FREQ_MUL; /* stereo indicator == stereo (instead of mono) */ if ((radio->registers[STATUSRSSI] & STATUSRSSI_ST) == 0) tuner->rxsubchans = V4L2_TUNER_SUB_MONO; else tuner->rxsubchans = V4L2_TUNER_SUB_STEREO; /* If there is a reliable method of detecting an RDS channel, then this code should check for that before setting this RDS subchannel. */ tuner->rxsubchans |= V4L2_TUNER_SUB_RDS; /* mono/stereo selector */ if ((radio->registers[POWERCFG] & POWERCFG_MONO) == 0) tuner->audmode = V4L2_TUNER_MODE_STEREO; else tuner->audmode = V4L2_TUNER_MODE_MONO; /* min is worst, max is best; signal:0..0xffff; rssi: 0..0xff */ /* measured in units of dbµV in 1 db increments (max at ~75 dbµV) */ tuner->signal = (radio->registers[STATUSRSSI] & STATUSRSSI_RSSI); /* the ideal factor is 0xffff/75 = 873,8 */ tuner->signal = (tuner->signal * 873) + (8 * tuner->signal / 10); if (tuner->signal > 0xffff) tuner->signal = 0xffff; /* automatic frequency control: -1: freq to low, 1 freq to high */ /* AFCRL does only indicate that freq. differs, not if too low/high */ tuner->afc = (radio->registers[STATUSRSSI] & STATUSRSSI_AFCRL) ? 1 : 0; return retval; } /* * si470x_vidioc_s_tuner - set tuner attributes */ static int si470x_vidioc_s_tuner(struct file *file, void *priv, const struct v4l2_tuner *tuner) { struct si470x_device *radio = video_drvdata(file); if (tuner->index != 0) return -EINVAL; /* mono/stereo selector */ switch (tuner->audmode) { case V4L2_TUNER_MODE_MONO: radio->registers[POWERCFG] |= POWERCFG_MONO; /* force mono */ break; case V4L2_TUNER_MODE_STEREO: default: radio->registers[POWERCFG] &= ~POWERCFG_MONO; /* try stereo */ break; } return radio->set_register(radio, POWERCFG); } /* * si470x_vidioc_g_frequency - get tuner or modulator radio frequency */ static int si470x_vidioc_g_frequency(struct file *file, void *priv, struct v4l2_frequency *freq) { struct si470x_device *radio = video_drvdata(file); if (freq->tuner != 0) return -EINVAL; freq->type = V4L2_TUNER_RADIO; return si470x_get_freq(radio, &freq->frequency); } /* * si470x_vidioc_s_frequency - set tuner or modulator radio frequency */ static int si470x_vidioc_s_frequency(struct file *file, void *priv, const struct v4l2_frequency *freq) { struct si470x_device *radio = video_drvdata(file); int retval; if (freq->tuner != 0) return -EINVAL; if (freq->frequency < bands[radio->band].rangelow || freq->frequency > bands[radio->band].rangehigh) { /* Switch to band 1 which covers everything we support */ retval = si470x_set_band(radio, 1); if (retval) return retval; } return si470x_set_freq(radio, freq->frequency); } /* * si470x_vidioc_s_hw_freq_seek - set hardware frequency seek */ static int si470x_vidioc_s_hw_freq_seek(struct file *file, void *priv, const struct v4l2_hw_freq_seek *seek) { struct si470x_device *radio = video_drvdata(file); if (seek->tuner != 0) return -EINVAL; if (file->f_flags & O_NONBLOCK) return -EWOULDBLOCK; return si470x_set_seek(radio, seek); } /* * si470x_vidioc_enum_freq_bands - enumerate supported bands */ static int si470x_vidioc_enum_freq_bands(struct file *file, void *priv, struct v4l2_frequency_band *band) { if (band->tuner != 0) return -EINVAL; if (band->index >= ARRAY_SIZE(bands)) return -EINVAL; *band = bands[band->index]; return 0; } const struct v4l2_ctrl_ops si470x_ctrl_ops = { .s_ctrl = si470x_s_ctrl, }; EXPORT_SYMBOL_GPL(si470x_ctrl_ops); static int si470x_vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *capability) { struct si470x_device *radio = video_drvdata(file); return radio->vidioc_querycap(file, priv, capability); }; /* * si470x_ioctl_ops - video device ioctl operations */ static const struct v4l2_ioctl_ops si470x_ioctl_ops = { .vidioc_querycap = si470x_vidioc_querycap, .vidioc_g_tuner = si470x_vidioc_g_tuner, .vidioc_s_tuner = si470x_vidioc_s_tuner, .vidioc_g_frequency = si470x_vidioc_g_frequency, .vidioc_s_frequency = si470x_vidioc_s_frequency, .vidioc_s_hw_freq_seek = si470x_vidioc_s_hw_freq_seek, .vidioc_enum_freq_bands = si470x_vidioc_enum_freq_bands, .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; /* * si470x_viddev_template - video device interface */ const struct video_device si470x_viddev_template = { .fops = &si470x_fops, .name = DRIVER_NAME, .release = video_device_release_empty, .ioctl_ops = &si470x_ioctl_ops, }; EXPORT_SYMBOL_GPL(si470x_viddev_template); MODULE_DESCRIPTION("Core radio driver for Si470x FM Radio Receivers"); MODULE_LICENSE("GPL"); |
33 33 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/seq_file.h> #include <net/ip.h> #include <net/mptcp.h> #include <net/snmp.h> #include <net/net_namespace.h> #include "mib.h" static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR), SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK), SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET), SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX), SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX), SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX), SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX), SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX), SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX), SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX), SNMP_MIB_ITEM("MPFastcloseTx", MPTCP_MIB_MPFASTCLOSETX), SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX), SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX), SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED), SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE), SNMP_MIB_SENTINEL }; /* mptcp_mib_alloc - allocate percpu mib counters * * These are allocated when the first mptcp socket is created so * we do not waste percpu memory if mptcp isn't in use. */ bool mptcp_mib_alloc(struct net *net) { struct mptcp_mib __percpu *mib = alloc_percpu(struct mptcp_mib); if (!mib) return false; if (cmpxchg(&net->mib.mptcp_statistics, NULL, mib)) free_percpu(mib); return true; } void mptcp_seq_show(struct seq_file *seq) { unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1]; struct net *net = seq->private; int i; seq_puts(seq, "MPTcpExt:"); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %s", mptcp_snmp_list[i].name); seq_puts(seq, "\nMPTcpExt:"); memset(sum, 0, sizeof(sum)); if (net->mib.mptcp_statistics) snmp_get_cpu_field_batch(sum, mptcp_snmp_list, net->mib.mptcp_statistics); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %lu", sum[i]); seq_putc(seq, '\n'); } |
296 20 13 13 1 8 11 40 72 6 2 68 4255 4 4 1 1 1 1 2 58 33 14 2 28 28 28 30 30 28 217 164 76 91 3975 3973 3975 3965 26 3975 159 159 159 159 1 10 10 159 113 69 287 29 33 33 30 30 1 3 10 28 29 30 16 19 30 30 30 3 26 33 33 20 23 23 33 33 9 9 7 32 32 88 88 89 68 46 16 16 3 351 34 59 2 2 10 10 8 2 34 296 9 6 70 9 4 4 13 1 9 15 4074 4074 1 30 66 3363 3974 3551 3848 1 2 3847 10 4044 14 4071 277 64 211 2 11 279 11 1 12 13 1 12 12 13 30 13 13 4 140 8 8 265 26 266 260 10 279 286 286 286 266 27 287 265 27 287 284 7 287 266 27 287 287 1 184 105 286 286 179 113 113 113 9 105 63 63 3 39 14 8 63 9 25 25 25 21 80 80 78 74 29 1 46 74 74 14 2 2 2 19 13 28 28 32 17 52 3862 3862 98 55 45 97 74 80 24 2 58 21 68 8 8 2 68 61 10 13 22 30 28 2 3 7 33 31 13 33 34 1 34 1 19 17 1 16 337 6 32 334 355 6 37 3 8 343 10 265 6 69 332 74 291 7 30 31 333 3 2 287 3 2 2 1 63 332 33 354 359 359 120 11 2 5 2 99 87 43 43 79 3 70 58 58 25 16 1 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 | // SPDX-License-Identifier: GPL-2.0-or-later /* * fs/eventpoll.c (Efficient event retrieval implementation) * Copyright (C) 2001,...,2009 Davide Libenzi * * Davide Libenzi <davidel@xmailserver.org> */ #include <linux/init.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/signal.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/string.h> #include <linux/list.h> #include <linux/hash.h> #include <linux/spinlock.h> #include <linux/syscalls.h> #include <linux/rbtree.h> #include <linux/wait.h> #include <linux/eventpoll.h> #include <linux/mount.h> #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> #include <linux/uaccess.h> #include <asm/io.h> #include <asm/mman.h> #include <linux/atomic.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/compat.h> #include <linux/rculist.h> #include <linux/capability.h> #include <net/busy_poll.h> /* * LOCKING: * There are three level of locking required by epoll : * * 1) epnested_mutex (mutex) * 2) ep->mtx (mutex) * 3) ep->lock (rwlock) * * The acquire order is the one listed above, from 1 to 3. * We need a rwlock (ep->lock) because we manipulate objects * from inside the poll callback, that might be triggered from * a wake_up() that in turn might be called from IRQ context. * So we can't sleep inside the poll callback and hence we need * a spinlock. During the event transfer loop (from kernel to * user space) we could end up sleeping due a copy_to_user(), so * we need a lock that will allow us to sleep. This lock is a * mutex (ep->mtx). It is acquired during the event transfer loop, * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). * The epnested_mutex is acquired when inserting an epoll fd onto another * epoll fd. We do this so that we walk the epoll tree and ensure that this * insertion does not create a cycle of epoll file descriptors, which * could lead to deadlock. We need a global mutex to prevent two * simultaneous inserts (A into B and B into A) from racing and * constructing a cycle without either insert observing that it is * going to. * It is necessary to acquire multiple "ep->mtx"es at once in the * case when one epoll fd is added to another. In this case, we * always acquire the locks in the order of nesting (i.e. after * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired * before e2->mtx). Since we disallow cycles of epoll file * descriptors, this ensures that the mutexes are well-ordered. In * order to communicate this nesting to lockdep, when walking a tree * of epoll file descriptors, we use the current recursion depth as * the lockdep subkey. * It is possible to drop the "ep->mtx" and to use the global * mutex "epnested_mutex" (together with "ep->lock") to have it working, * but having "ep->mtx" will make the interface more scalable. * Events that require holding "epnested_mutex" are very rare, while for * normal operations the epoll private "ep->mtx" will guarantee * a better scalability. */ /* Epoll private bits inside the event mask */ #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE) #define EPOLLINOUT_BITS (EPOLLIN | EPOLLOUT) #define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | EPOLLERR | EPOLLHUP | \ EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) #define EP_UNACTIVE_PTR ((void *) -1L) #define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry)) struct epoll_filefd { struct file *file; int fd; } __packed; /* Wait structure used by the poll hooks */ struct eppoll_entry { /* List header used to link this structure to the "struct epitem" */ struct eppoll_entry *next; /* The "base" pointer is set to the container "struct epitem" */ struct epitem *base; /* * Wait queue item that will be linked to the target file wait * queue head. */ wait_queue_entry_t wait; /* The wait queue head that linked the "wait" wait queue item */ wait_queue_head_t *whead; }; /* * Each file descriptor added to the eventpoll interface will * have an entry of this type linked to the "rbr" RB tree. * Avoid increasing the size of this struct, there can be many thousands * of these on a server and we do not want this to take another cache line. */ struct epitem { union { /* RB tree node links this structure to the eventpoll RB tree */ struct rb_node rbn; /* Used to free the struct epitem */ struct rcu_head rcu; }; /* List header used to link this structure to the eventpoll ready list */ struct list_head rdllink; /* * Works together "struct eventpoll"->ovflist in keeping the * single linked chain of items. */ struct epitem *next; /* The file descriptor information this item refers to */ struct epoll_filefd ffd; /* * Protected by file->f_lock, true for to-be-released epitem already * removed from the "struct file" items list; together with * eventpoll->refcount orchestrates "struct eventpoll" disposal */ bool dying; /* List containing poll wait queues */ struct eppoll_entry *pwqlist; /* The "container" of this item */ struct eventpoll *ep; /* List header used to link this item to the "struct file" items list */ struct hlist_node fllink; /* wakeup_source used when EPOLLWAKEUP is set */ struct wakeup_source __rcu *ws; /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; /* * This structure is stored inside the "private_data" member of the file * structure and represents the main data structure for the eventpoll * interface. */ struct eventpoll { /* * This mutex is used to ensure that files are not removed * while epoll is using them. This is held during the event * collection loop, the file cleanup path, the epoll file exit * code and the ctl operations. */ struct mutex mtx; /* Wait queue used by sys_epoll_wait() */ wait_queue_head_t wq; /* Wait queue used by file->poll() */ wait_queue_head_t poll_wait; /* List of ready file descriptors */ struct list_head rdllist; /* Lock which protects rdllist and ovflist */ rwlock_t lock; /* RB tree root used to store monitored fd structs */ struct rb_root_cached rbr; /* * This is a single linked list that chains all the "struct epitem" that * happened while transferring ready events to userspace w/out * holding ->lock. */ struct epitem *ovflist; /* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */ struct wakeup_source *ws; /* The user that created the eventpoll descriptor */ struct user_struct *user; struct file *file; /* used to optimize loop detection check */ u64 gen; struct hlist_head refs; /* * usage count, used together with epitem->dying to * orchestrate the disposal of this struct */ refcount_t refcount; #ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ unsigned int napi_id; /* busy poll timeout */ u32 busy_poll_usecs; /* busy poll packet budget */ u16 busy_poll_budget; bool prefer_busy_poll; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC /* tracks wakeup nests for lockdep validation */ u8 nests; #endif }; /* Wrapper struct used by poll queueing */ struct ep_pqueue { poll_table pt; struct epitem *epi; }; /* * Configuration options available inside /proc/sys/fs/epoll/ */ /* Maximum number of epoll watched descriptors, per user */ static long max_user_watches __read_mostly; /* Used for cycles detection */ static DEFINE_MUTEX(epnested_mutex); static u64 loop_check_gen = 0; /* Used to check for epoll file descriptor inclusion loops */ static struct eventpoll *inserting_into; /* Slab cache used to allocate "struct epitem" */ static struct kmem_cache *epi_cache __ro_after_init; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __ro_after_init; /* * List of files with newly added links, where we may need to limit the number * of emanating paths. Protected by the epnested_mutex. */ struct epitems_head { struct hlist_head epitems; struct epitems_head *next; }; static struct epitems_head *tfile_check_list = EP_UNACTIVE_PTR; static struct kmem_cache *ephead_cache __ro_after_init; static inline void free_ephead(struct epitems_head *head) { if (head) kmem_cache_free(ephead_cache, head); } static void list_file(struct file *file) { struct epitems_head *head; head = container_of(file->f_ep, struct epitems_head, epitems); if (!head->next) { head->next = tfile_check_list; tfile_check_list = head; } } static void unlist_file(struct epitems_head *head) { struct epitems_head *to_free = head; struct hlist_node *p = rcu_dereference(hlist_first_rcu(&head->epitems)); if (p) { struct epitem *epi= container_of(p, struct epitem, fllink); spin_lock(&epi->ffd.file->f_lock); if (!hlist_empty(&head->epitems)) to_free = NULL; head->next = NULL; spin_unlock(&epi->ffd.file->f_lock); } free_ephead(to_free); } #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> static long long_zero; static long long_max = LONG_MAX; static const struct ctl_table epoll_table[] = { { .procname = "max_user_watches", .data = &max_user_watches, .maxlen = sizeof(max_user_watches), .mode = 0644, .proc_handler = proc_doulongvec_minmax, .extra1 = &long_zero, .extra2 = &long_max, }, }; static void __init epoll_sysctls_init(void) { register_sysctl("fs/epoll", epoll_table); } #else #define epoll_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ static const struct file_operations eventpoll_fops; static inline int is_file_epoll(struct file *f) { return f->f_op == &eventpoll_fops; } /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, struct file *file, int fd) { ffd->file = file; ffd->fd = fd; } /* Compare RB tree keys */ static inline int ep_cmp_ffd(struct epoll_filefd *p1, struct epoll_filefd *p2) { return (p1->file > p2->file ? +1: (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } /* Tells us if the item is currently linked */ static inline int ep_is_linked(struct epitem *epi) { return !list_empty(&epi->rdllink); } static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait); } /* Get the "struct epitem" from a wait queue pointer */ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p) { return container_of(p, struct eppoll_entry, wait)->base; } /** * ep_events_available - Checks if ready events might be available. * * @ep: Pointer to the eventpoll context. * * Return: a value different than %zero if ready events are available, * or %zero otherwise. */ static inline int ep_events_available(struct eventpoll *ep) { return !list_empty_careful(&ep->rdllist) || READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; } #ifdef CONFIG_NET_RX_BUSY_POLL /** * busy_loop_ep_timeout - check if busy poll has timed out. The timeout value * from the epoll instance ep is preferred, but if it is not set fallback to * the system-wide global via busy_loop_timeout. * * @start_time: The start time used to compute the remaining time until timeout. * @ep: Pointer to the eventpoll context. * * Return: true if the timeout has expired, false otherwise. */ static bool busy_loop_ep_timeout(unsigned long start_time, struct eventpoll *ep) { unsigned long bp_usec = READ_ONCE(ep->busy_poll_usecs); if (bp_usec) { unsigned long end_time = start_time + bp_usec; unsigned long now = busy_loop_current_time(); return time_after(now, end_time); } else { return busy_loop_timeout(start_time); } } static bool ep_busy_loop_on(struct eventpoll *ep) { return !!READ_ONCE(ep->busy_poll_usecs) || READ_ONCE(ep->prefer_busy_poll) || net_busy_loop_on(); } static bool ep_busy_loop_end(void *p, unsigned long start_time) { struct eventpoll *ep = p; return ep_events_available(ep) || busy_loop_ep_timeout(start_time, ep); } /* * Busy poll if globally on and supporting sockets found && no events, * busy loop will return if need_resched or ep_events_available. * * we must do our busy polling with irqs enabled */ static bool ep_busy_loop(struct eventpoll *ep, int nonblock) { unsigned int napi_id = READ_ONCE(ep->napi_id); u16 budget = READ_ONCE(ep->busy_poll_budget); bool prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll); if (!budget) budget = BUSY_POLL_BUDGET; if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) { napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, prefer_busy_poll, budget); if (ep_events_available(ep)) return true; /* * Busy poll timed out. Drop NAPI ID for now, we can add * it back in when we have moved a socket with a valid NAPI * ID onto the ready list. */ if (prefer_busy_poll) napi_resume_irqs(napi_id); ep->napi_id = 0; return false; } return false; } /* * Set epoll busy poll NAPI ID from sk. */ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) { struct eventpoll *ep = epi->ep; unsigned int napi_id; struct socket *sock; struct sock *sk; if (!ep_busy_loop_on(ep)) return; sock = sock_from_file(epi->ffd.file); if (!sock) return; sk = sock->sk; if (!sk) return; napi_id = READ_ONCE(sk->sk_napi_id); /* Non-NAPI IDs can be rejected * or * Nothing to do if we already have this ID */ if (napi_id < MIN_NAPI_ID || napi_id == ep->napi_id) return; /* record NAPI ID for use in next busy poll */ ep->napi_id = napi_id; } static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct eventpoll *ep = file->private_data; void __user *uarg = (void __user *)arg; struct epoll_params epoll_params; switch (cmd) { case EPIOCSPARAMS: if (copy_from_user(&epoll_params, uarg, sizeof(epoll_params))) return -EFAULT; /* pad byte must be zero */ if (epoll_params.__pad) return -EINVAL; if (epoll_params.busy_poll_usecs > S32_MAX) return -EINVAL; if (epoll_params.prefer_busy_poll > 1) return -EINVAL; if (epoll_params.busy_poll_budget > NAPI_POLL_WEIGHT && !capable(CAP_NET_ADMIN)) return -EPERM; WRITE_ONCE(ep->busy_poll_usecs, epoll_params.busy_poll_usecs); WRITE_ONCE(ep->busy_poll_budget, epoll_params.busy_poll_budget); WRITE_ONCE(ep->prefer_busy_poll, epoll_params.prefer_busy_poll); return 0; case EPIOCGPARAMS: memset(&epoll_params, 0, sizeof(epoll_params)); epoll_params.busy_poll_usecs = READ_ONCE(ep->busy_poll_usecs); epoll_params.busy_poll_budget = READ_ONCE(ep->busy_poll_budget); epoll_params.prefer_busy_poll = READ_ONCE(ep->prefer_busy_poll); if (copy_to_user(uarg, &epoll_params, sizeof(epoll_params))) return -EFAULT; return 0; default: return -ENOIOCTLCMD; } } static void ep_suspend_napi_irqs(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll)) napi_suspend_irqs(napi_id); } static void ep_resume_napi_irqs(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); if (napi_id >= MIN_NAPI_ID && READ_ONCE(ep->prefer_busy_poll)) napi_resume_irqs(napi_id); } #else static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock) { return false; } static inline void ep_set_busy_poll_napi_id(struct epitem *epi) { } static long ep_eventpoll_bp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return -EOPNOTSUPP; } static void ep_suspend_napi_irqs(struct eventpoll *ep) { } static void ep_resume_napi_irqs(struct eventpoll *ep) { } #endif /* CONFIG_NET_RX_BUSY_POLL */ /* * As described in commit 0ccf831cb lockdep: annotate epoll * the use of wait queues used by epoll is done in a very controlled * manner. Wake ups can nest inside each other, but are never done * with the same locking. For example: * * dfd = socket(...); * efd1 = epoll_create(); * efd2 = epoll_create(); * epoll_ctl(efd1, EPOLL_CTL_ADD, dfd, ...); * epoll_ctl(efd2, EPOLL_CTL_ADD, efd1, ...); * * When a packet arrives to the device underneath "dfd", the net code will * issue a wake_up() on its poll wake list. Epoll (efd1) has installed a * callback wakeup entry on that queue, and the wake_up() performed by the * "dfd" net code will end up in ep_poll_callback(). At this point epoll * (efd1) notices that it may have some event ready, so it needs to wake up * the waiters on its poll wait list (efd2). So it calls ep_poll_safewake() * that ends up in another wake_up(), after having checked about the * recursion constraints. That are, no more than EP_MAX_NESTS, to avoid * stack blasting. * * When CONFIG_DEBUG_LOCK_ALLOC is enabled, make sure lockdep can handle * this special case of epoll. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, unsigned pollflags) { struct eventpoll *ep_src; unsigned long flags; u8 nests = 0; /* * To set the subclass or nesting level for spin_lock_irqsave_nested() * it might be natural to create a per-cpu nest count. However, since * we can recurse on ep->poll_wait.lock, and a non-raw spinlock can * schedule() in the -rt kernel, the per-cpu variable are no longer * protected. Thus, we are introducing a per eventpoll nest field. * If we are not being call from ep_poll_callback(), epi is NULL and * we are at the first level of nesting, 0. Otherwise, we are being * called from ep_poll_callback() and if a previous wakeup source is * not an epoll file itself, we are at depth 1 since the wakeup source * is depth 0. If the wakeup source is a previous epoll file in the * wakeup chain then we use its nests value and record ours as * nests + 1. The previous epoll file nests value is stable since its * already holding its own poll_wait.lock. */ if (epi) { if ((is_file_epoll(epi->ffd.file))) { ep_src = epi->ffd.file->private_data; nests = ep_src->nests; } else { nests = 1; } } spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests); ep->nests = nests + 1; wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags); ep->nests = 0; spin_unlock_irqrestore(&ep->poll_wait.lock, flags); } #else static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi, __poll_t pollflags) { wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags); } #endif static void ep_remove_wait_queue(struct eppoll_entry *pwq) { wait_queue_head_t *whead; rcu_read_lock(); /* * If it is cleared by POLLFREE, it should be rcu-safe. * If we read NULL we need a barrier paired with * smp_store_release() in ep_poll_callback(), otherwise * we rely on whead->lock. */ whead = smp_load_acquire(&pwq->whead); if (whead) remove_wait_queue(whead, &pwq->wait); rcu_read_unlock(); } /* * This function unregisters poll callbacks from the associated file * descriptor. Must be called with "mtx" held. */ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) { struct eppoll_entry **p = &epi->pwqlist; struct eppoll_entry *pwq; while ((pwq = *p) != NULL) { *p = pwq->next; ep_remove_wait_queue(pwq); kmem_cache_free(pwq_cache, pwq); } } /* call only when ep->mtx is held */ static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi) { return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx)); } /* call only when ep->mtx is held */ static inline void ep_pm_stay_awake(struct epitem *epi) { struct wakeup_source *ws = ep_wakeup_source(epi); if (ws) __pm_stay_awake(ws); } static inline bool ep_has_wakeup_source(struct epitem *epi) { return rcu_access_pointer(epi->ws) ? true : false; } /* call when ep->mtx cannot be held (ep_poll_callback) */ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) { struct wakeup_source *ws; rcu_read_lock(); ws = rcu_dereference(epi->ws); if (ws) __pm_stay_awake(ws); rcu_read_unlock(); } /* * ep->mutex needs to be held because we could be hit by * eventpoll_release_file() and epoll_ctl(). */ static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist) { /* * Steal the ready list, and re-init the original one to the * empty list. Also, set ep->ovflist to NULL so that events * happening while looping w/out locks, are not lost. We cannot * have the poll callback to queue directly on ep->rdllist, * because we want the "sproc" callback to be able to do it * in a lockless way. */ lockdep_assert_irqs_enabled(); write_lock_irq(&ep->lock); list_splice_init(&ep->rdllist, txlist); WRITE_ONCE(ep->ovflist, NULL); write_unlock_irq(&ep->lock); } static void ep_done_scan(struct eventpoll *ep, struct list_head *txlist) { struct epitem *epi, *nepi; write_lock_irq(&ep->lock); /* * During the time we spent inside the "sproc" callback, some * other events might have been queued by the poll callback. * We re-insert them inside the main ready-list here. */ for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { /* * We need to check if the item is already in the list. * During the "sproc" callback execution time, items are * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ if (!ep_is_linked(epi)) { /* * ->ovflist is LIFO, so we have to reverse it in order * to keep in FIFO. */ list_add(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after * releasing the lock, events will be queued in the normal way inside * ep->rdllist. */ WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); /* * Quickly re-inject items left on "txlist". */ list_splice(txlist, &ep->rdllist); __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { if (waitqueue_active(&ep->wq)) wake_up(&ep->wq); } write_unlock_irq(&ep->lock); } static void ep_get(struct eventpoll *ep) { refcount_inc(&ep->refcount); } /* * Returns true if the event poll can be disposed */ static bool ep_refcount_dec_and_test(struct eventpoll *ep) { if (!refcount_dec_and_test(&ep->refcount)) return false; WARN_ON_ONCE(!RB_EMPTY_ROOT(&ep->rbr.rb_root)); return true; } static void ep_free(struct eventpoll *ep) { ep_resume_napi_irqs(ep); mutex_destroy(&ep->mtx); free_uid(ep->user); wakeup_source_unregister(ep->ws); kfree(ep); } /* * Removes a "struct epitem" from the eventpoll RB tree and deallocates * all the associated resources. Must be called with "mtx" held. * If the dying flag is set, do the removal only if force is true. * This prevents ep_clear_and_put() from dropping all the ep references * while running concurrently with eventpoll_release_file(). * Returns true if the eventpoll can be disposed. */ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) { struct file *file = epi->ffd.file; struct epitems_head *to_free; struct hlist_head *head; lockdep_assert_irqs_enabled(); /* * Removes poll wait queue hooks. */ ep_unregister_pollwait(ep, epi); /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); if (epi->dying && !force) { spin_unlock(&file->f_lock); return false; } to_free = NULL; head = file->f_ep; if (head->first == &epi->fllink && !epi->fllink.next) { /* See eventpoll_release() for details. */ WRITE_ONCE(file->f_ep, NULL); if (!is_file_epoll(file)) { struct epitems_head *v; v = container_of(head, struct epitems_head, epitems); if (!smp_load_acquire(&v->next)) to_free = v; } } hlist_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); free_ephead(to_free); rb_erase_cached(&epi->rbn, &ep->rbr); write_lock_irq(&ep->lock); if (ep_is_linked(epi)) list_del_init(&epi->rdllink); write_unlock_irq(&ep->lock); wakeup_source_unregister(ep_wakeup_source(epi)); /* * At this point it is safe to free the eventpoll item. Use the union * field epi->rcu, since we are trying to minimize the size of * 'struct epitem'. The 'rbn' field is no longer in use. Protected by * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make * use of the rbn field. */ kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); return ep_refcount_dec_and_test(ep); } /* * ep_remove variant for callers owing an additional reference to the ep */ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { WARN_ON_ONCE(__ep_remove(ep, epi, false)); } static void ep_clear_and_put(struct eventpoll *ep) { struct rb_node *rbp, *next; struct epitem *epi; bool dispose; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) ep_poll_safewake(ep, NULL, 0); mutex_lock(&ep->mtx); /* * Walks through the whole tree by unregistering poll callbacks. */ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); ep_unregister_pollwait(ep, epi); cond_resched(); } /* * Walks through the whole tree and try to free each "struct epitem". * Note that ep_remove_safe() will not remove the epitem in case of a * racing eventpoll_release_file(); the latter will do the removal. * At this point we are sure no poll callbacks will be lingering around. * Since we still own a reference to the eventpoll struct, the loop can't * dispose it. */ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) { next = rb_next(rbp); epi = rb_entry(rbp, struct epitem, rbn); ep_remove_safe(ep, epi); cond_resched(); } dispose = ep_refcount_dec_and_test(ep); mutex_unlock(&ep->mtx); if (dispose) ep_free(ep); } static long ep_eventpoll_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret; if (!is_file_epoll(file)) return -EINVAL; switch (cmd) { case EPIOCSPARAMS: case EPIOCGPARAMS: ret = ep_eventpoll_bp_ioctl(file, cmd, arg); break; default: ret = -EINVAL; break; } return ret; } static int ep_eventpoll_release(struct inode *inode, struct file *file) { struct eventpoll *ep = file->private_data; if (ep) ep_clear_and_put(ep); return 0; } static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth); static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth) { struct eventpoll *ep = file->private_data; LIST_HEAD(txlist); struct epitem *epi, *tmp; poll_table pt; __poll_t res = 0; init_poll_funcptr(&pt, NULL); /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); /* * Proceed to find out if wanted events are really available inside * the ready list. */ mutex_lock_nested(&ep->mtx, depth); ep_start_scan(ep, &txlist); list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { if (ep_item_poll(epi, &pt, depth + 1)) { res = EPOLLIN | EPOLLRDNORM; break; } else { /* * Item has been dropped into the ready list by the poll * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ __pm_relax(ep_wakeup_source(epi)); list_del_init(&epi->rdllink); } } ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); return res; } /* * The ffd.file pointer may be in the process of being torn down due to * being closed, but we may not have finished eventpoll_release() yet. * * Normally, even with the atomic_long_inc_not_zero, the file may have * been free'd and then gotten re-allocated to something else (since * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). * * But for epoll, users hold the ep->mtx mutex, and as such any file in * the process of being free'd will block in eventpoll_release_file() * and thus the underlying file allocation will not be free'd, and the * file re-use cannot happen. * * For the same reason we can avoid a rcu_read_lock() around the * operation - 'ffd.file' cannot go away even if the refcount has * reached zero (but we must still not call out to ->poll() functions * etc). */ static struct file *epi_fget(const struct epitem *epi) { struct file *file; file = epi->ffd.file; if (!file_ref_get(&file->f_ref)) file = NULL; return file; } /* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() * is correctly annotated. */ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { struct file *file = epi_fget(epi); __poll_t res; /* * We could return EPOLLERR | EPOLLHUP or something, but let's * treat this more as "file doesn't exist, poll didn't happen". */ if (!file) return 0; pt->_key = epi->event.events; if (!is_file_epoll(file)) res = vfs_poll(file, pt); else res = __ep_eventpoll_poll(file, pt, depth); fput(file); return res & epi->event.events; } static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { return __ep_eventpoll_poll(file, wait, 0); } #ifdef CONFIG_PROC_FS static void ep_show_fdinfo(struct seq_file *m, struct file *f) { struct eventpoll *ep = f->private_data; struct rb_node *rbp; mutex_lock(&ep->mtx); for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { struct epitem *epi = rb_entry(rbp, struct epitem, rbn); struct inode *inode = file_inode(epi->ffd.file); seq_printf(m, "tfd: %8d events: %8x data: %16llx " " pos:%lli ino:%lx sdev:%x\n", epi->ffd.fd, epi->event.events, (long long)epi->event.data, (long long)epi->ffd.file->f_pos, inode->i_ino, inode->i_sb->s_dev); if (seq_has_overflowed(m)) break; } mutex_unlock(&ep->mtx); } #endif /* File callbacks that implement the eventpoll file behaviour */ static const struct file_operations eventpoll_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = ep_show_fdinfo, #endif .release = ep_eventpoll_release, .poll = ep_eventpoll_poll, .llseek = noop_llseek, .unlocked_ioctl = ep_eventpoll_ioctl, .compat_ioctl = compat_ptr_ioctl, }; /* * This is called from eventpoll_release() to unlink files from the eventpoll * interface. We need to have this facility to cleanup correctly files that are * closed without being removed from the eventpoll interface. */ void eventpoll_release_file(struct file *file) { struct eventpoll *ep; struct epitem *epi; bool dispose; /* * Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from * touching the epitems list before eventpoll_release_file() can access * the ep->mtx. */ again: spin_lock(&file->f_lock); if (file->f_ep && file->f_ep->first) { epi = hlist_entry(file->f_ep->first, struct epitem, fllink); epi->dying = true; spin_unlock(&file->f_lock); /* * ep access is safe as we still own a reference to the ep * struct */ ep = epi->ep; mutex_lock(&ep->mtx); dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); if (dispose) ep_free(ep); goto again; } spin_unlock(&file->f_lock); } static int ep_alloc(struct eventpoll **pep) { struct eventpoll *ep; ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (unlikely(!ep)) return -ENOMEM; mutex_init(&ep->mtx); rwlock_init(&ep->lock); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT_CACHED; ep->ovflist = EP_UNACTIVE_PTR; ep->user = get_current_user(); refcount_set(&ep->refcount, 1); *pep = ep; return 0; } /* * Search the file inside the eventpoll tree. The RB tree operations * are protected by the "mtx" mutex, and ep_find() must be called with * "mtx" held. */ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) { int kcmp; struct rb_node *rbp; struct epitem *epi, *epir = NULL; struct epoll_filefd ffd; ep_set_ffd(&ffd, file, fd); for (rbp = ep->rbr.rb_root.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); kcmp = ep_cmp_ffd(&ffd, &epi->ffd); if (kcmp > 0) rbp = rbp->rb_right; else if (kcmp < 0) rbp = rbp->rb_left; else { epir = epi; break; } } return epir; } #ifdef CONFIG_KCMP static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff) { struct rb_node *rbp; struct epitem *epi; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (epi->ffd.fd == tfd) { if (toff == 0) return epi; else toff--; } cond_resched(); } return NULL; } struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff) { struct file *file_raw; struct eventpoll *ep; struct epitem *epi; if (!is_file_epoll(file)) return ERR_PTR(-EINVAL); ep = file->private_data; mutex_lock(&ep->mtx); epi = ep_find_tfd(ep, tfd, toff); if (epi) file_raw = epi->ffd.file; else file_raw = ERR_PTR(-ENOENT); mutex_unlock(&ep->mtx); return file_raw; } #endif /* CONFIG_KCMP */ /* * Adds a new entry to the tail of the list in a lockless way, i.e. * multiple CPUs are allowed to call this function concurrently. * * Beware: it is necessary to prevent any other modifications of the * existing list until all changes are completed, in other words * concurrent list_add_tail_lockless() calls should be protected * with a read lock, where write lock acts as a barrier which * makes sure all list_add_tail_lockless() calls are fully * completed. * * Also an element can be locklessly added to the list only in one * direction i.e. either to the tail or to the head, otherwise * concurrent access will corrupt the list. * * Return: %false if element has been already added to the list, %true * otherwise. */ static inline bool list_add_tail_lockless(struct list_head *new, struct list_head *head) { struct list_head *prev; /* * This is simple 'new->next = head' operation, but cmpxchg() * is used in order to detect that same element has been just * added to the list from another CPU: the winner observes * new->next == new. */ if (!try_cmpxchg(&new->next, &new, head)) return false; /* * Initially ->next of a new element must be updated with the head * (we are inserting to the tail) and only then pointers are atomically * exchanged. XCHG guarantees memory ordering, thus ->next should be * updated before pointers are actually swapped and pointers are * swapped before prev->next is updated. */ prev = xchg(&head->prev, new); /* * It is safe to modify prev->next and new->prev, because a new element * is added only to the tail and new->next is updated before XCHG. */ prev->next = new; new->prev = prev; return true; } /* * Chains a new epi entry to the tail of the ep->ovflist in a lockless way, * i.e. multiple CPUs are allowed to call this function concurrently. * * Return: %false if epi element has been already chained, %true otherwise. */ static inline bool chain_epi_lockless(struct epitem *epi) { struct eventpoll *ep = epi->ep; /* Fast preliminary check */ if (epi->next != EP_UNACTIVE_PTR) return false; /* Check that the same epi has not been just chained from another CPU */ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) return false; /* Atomically exchange tail */ epi->next = xchg(&ep->ovflist, epi); return true; } /* * This is the callback that is passed to the wait queue wakeup * mechanism. It is called by the stored file descriptors when they * have events to report. * * This callback takes a read lock in order not to contend with concurrent * events from another file descriptor, thus all modifications to ->rdllist * or ->ovflist are lockless. Read lock is paired with the write lock from * ep_start/done_scan(), which stops all list modifications and guarantees * that lists state is seen correctly. * * Another thing worth to mention is that ep_poll_callback() can be called * concurrently for the same @epi from different CPUs if poll table was inited * with several wait queues entries. Plural wakeup from different CPUs of a * single wait queue is serialized by wq.lock, but the case when multiple wait * queues are used should be detected accordingly. This is detected using * cmpxchg() operation. */ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { int pwake = 0; struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; __poll_t pollflags = key_to_poll(key); unsigned long flags; int ewake = 0; read_lock_irqsave(&ep->lock, flags); ep_set_busy_poll_napi_id(epi); /* * If the event mask does not contain any poll(2) event, we consider the * descriptor to be disabled. This condition is likely the effect of the * EPOLLONESHOT bit that disables the descriptor when an event is received, * until the next EPOLL_CTL_MOD will be issued. */ if (!(epi->event.events & ~EP_PRIVATE_BITS)) goto out_unlock; /* * Check the events coming with the callback. At this stage, not * every device reports the events in the "key" parameter of the * callback. We need to be able to handle both cases here, hence the * test for "key" != NULL before the event match test. */ if (pollflags && !(pollflags & epi->event.events)) goto out_unlock; /* * If we are transferring events to userspace, we can hold no locks * (because we're accessing user memory, and because of linux f_op->poll() * semantics). All the events that happen during that period of time are * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { if (chain_epi_lockless(epi)) ep_pm_stay_awake_rcu(epi); } else if (!ep_is_linked(epi)) { /* In the usual case, add event to ready list. */ if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) ep_pm_stay_awake_rcu(epi); } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() * wait list. */ if (waitqueue_active(&ep->wq)) { if ((epi->event.events & EPOLLEXCLUSIVE) && !(pollflags & POLLFREE)) { switch (pollflags & EPOLLINOUT_BITS) { case EPOLLIN: if (epi->event.events & EPOLLIN) ewake = 1; break; case EPOLLOUT: if (epi->event.events & EPOLLOUT) ewake = 1; break; case 0: ewake = 1; break; } } if (sync) wake_up_sync(&ep->wq); else wake_up(&ep->wq); } if (waitqueue_active(&ep->poll_wait)) pwake++; out_unlock: read_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE); if (!(epi->event.events & EPOLLEXCLUSIVE)) ewake = 1; if (pollflags & POLLFREE) { /* * If we race with ep_remove_wait_queue() it can miss * ->whead = NULL and do another remove_wait_queue() after * us, so we can't use __remove_wait_queue(). */ list_del_init(&wait->entry); /* * ->whead != NULL protects us from the race with * ep_clear_and_put() or ep_remove(), ep_remove_wait_queue() * takes whead->lock held by the caller. Once we nullify it, * nothing protects ep/epi or even wait. */ smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); } return ewake; } /* * This is the callback that is used to add our wait queue to the * target file wakeup lists. */ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt) { struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt); struct epitem *epi = epq->epi; struct eppoll_entry *pwq; if (unlikely(!epi)) // an earlier allocation has failed return; pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL); if (unlikely(!pwq)) { epq->epi = NULL; return; } init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; if (epi->event.events & EPOLLEXCLUSIVE) add_wait_queue_exclusive(whead, &pwq->wait); else add_wait_queue(whead, &pwq->wait); pwq->next = epi->pwqlist; epi->pwqlist = pwq; } static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) { int kcmp; struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL; struct epitem *epic; bool leftmost = true; while (*p) { parent = *p; epic = rb_entry(parent, struct epitem, rbn); kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); if (kcmp > 0) { p = &parent->rb_right; leftmost = false; } else p = &parent->rb_left; } rb_link_node(&epi->rbn, parent, p); rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost); } #define PATH_ARR_SIZE 5 /* * These are the number paths of length 1 to 5, that we are allowing to emanate * from a single file of interest. For example, we allow 1000 paths of length * 1, to emanate from each file of interest. This essentially represents the * potential wakeup paths, which need to be limited in order to avoid massive * uncontrolled wakeup storms. The common use case should be a single ep which * is connected to n file sources. In this case each file source has 1 path * of length 1. Thus, the numbers below should be more than sufficient. These * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify * and delete can't add additional paths. Protected by the epnested_mutex. */ static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 }; static int path_count[PATH_ARR_SIZE]; static int path_count_inc(int nests) { /* Allow an arbitrary number of depth 1 paths */ if (nests == 0) return 0; if (++path_count[nests] > path_limits[nests]) return -1; return 0; } static void path_count_init(void) { int i; for (i = 0; i < PATH_ARR_SIZE; i++) path_count[i] = 0; } static int reverse_path_check_proc(struct hlist_head *refs, int depth) { int error = 0; struct epitem *epi; if (depth > EP_MAX_NESTS) /* too deep nesting */ return -1; /* CTL_DEL can remove links here, but that can't increase our count */ hlist_for_each_entry_rcu(epi, refs, fllink) { struct hlist_head *refs = &epi->ep->refs; if (hlist_empty(refs)) error = path_count_inc(depth); else error = reverse_path_check_proc(refs, depth + 1); if (error != 0) break; } return error; } /** * reverse_path_check - The tfile_check_list is list of epitem_head, which have * links that are proposed to be newly added. We need to * make sure that those added links don't add too many * paths such that we will spend all our time waking up * eventpoll objects. * * Return: %zero if the proposed links don't create too many paths, * %-1 otherwise. */ static int reverse_path_check(void) { struct epitems_head *p; for (p = tfile_check_list; p != EP_UNACTIVE_PTR; p = p->next) { int error; path_count_init(); rcu_read_lock(); error = reverse_path_check_proc(&p->epitems, 0); rcu_read_unlock(); if (error) return error; } return 0; } static int ep_create_wakeup_source(struct epitem *epi) { struct name_snapshot n; struct wakeup_source *ws; if (!epi->ep->ws) { epi->ep->ws = wakeup_source_register(NULL, "eventpoll"); if (!epi->ep->ws) return -ENOMEM; } take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); ws = wakeup_source_register(NULL, n.name.name); release_dentry_name_snapshot(&n); if (!ws) return -ENOMEM; rcu_assign_pointer(epi->ws, ws); return 0; } /* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */ static noinline void ep_destroy_wakeup_source(struct epitem *epi) { struct wakeup_source *ws = ep_wakeup_source(epi); RCU_INIT_POINTER(epi->ws, NULL); /* * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is * used internally by wakeup_source_remove, too (called by * wakeup_source_unregister), so we cannot use call_rcu */ synchronize_rcu(); wakeup_source_unregister(ws); } static int attach_epitem(struct file *file, struct epitem *epi) { struct epitems_head *to_free = NULL; struct hlist_head *head = NULL; struct eventpoll *ep = NULL; if (is_file_epoll(file)) ep = file->private_data; if (ep) { head = &ep->refs; } else if (!READ_ONCE(file->f_ep)) { allocate: to_free = kmem_cache_zalloc(ephead_cache, GFP_KERNEL); if (!to_free) return -ENOMEM; head = &to_free->epitems; } spin_lock(&file->f_lock); if (!file->f_ep) { if (unlikely(!head)) { spin_unlock(&file->f_lock); goto allocate; } /* See eventpoll_release() for details. */ WRITE_ONCE(file->f_ep, head); to_free = NULL; } hlist_add_head_rcu(&epi->fllink, file->f_ep); spin_unlock(&file->f_lock); free_ephead(to_free); return 0; } /* * Must be called with "mtx" held. */ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, struct file *tfile, int fd, int full_check) { int error, pwake = 0; __poll_t revents; struct epitem *epi; struct ep_pqueue epq; struct eventpoll *tep = NULL; if (is_file_epoll(tfile)) tep = tfile->private_data; lockdep_assert_irqs_enabled(); if (unlikely(percpu_counter_compare(&ep->user->epoll_watches, max_user_watches) >= 0)) return -ENOSPC; percpu_counter_inc(&ep->user->epoll_watches); if (!(epi = kmem_cache_zalloc(epi_cache, GFP_KERNEL))) { percpu_counter_dec(&ep->user->epoll_watches); return -ENOMEM; } /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; epi->next = EP_UNACTIVE_PTR; if (tep) mutex_lock_nested(&tep->mtx, 1); /* Add the current item to the list of active epoll hook for this file */ if (unlikely(attach_epitem(tfile, epi) < 0)) { if (tep) mutex_unlock(&tep->mtx); kmem_cache_free(epi_cache, epi); percpu_counter_dec(&ep->user->epoll_watches); return -ENOMEM; } if (full_check && !tep) list_file(tfile); /* * Add the current item to the RB tree. All RB tree operations are * protected by "mtx", and ep_insert() is called with "mtx" held. */ ep_rbtree_insert(ep, epi); if (tep) mutex_unlock(&tep->mtx); /* * ep_remove_safe() calls in the later error paths can't lead to * ep_free() as the ep file itself still holds an ep reference. */ ep_get(ep); /* now check if we've created too many backpaths */ if (unlikely(full_check && reverse_path_check())) { ep_remove_safe(ep, epi); return -EINVAL; } if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); if (error) { ep_remove_safe(ep, epi); return error; } } /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); /* * Attach the item to the poll hooks and get current event bits. * We can safely use the file* here because its usage count has * been increased by the caller of this function. Note that after * this operation completes, the poll callback can start hitting * the new item. */ revents = ep_item_poll(epi, &epq.pt, 1); /* * We have to check if something went wrong during the poll wait queue * install process. Namely an allocation for a wait queue failed due * high memory pressure. */ if (unlikely(!epq.epi)) { ep_remove_safe(ep, epi); return -ENOMEM; } /* We have to drop the new item inside our item list to keep track of it */ write_lock_irq(&ep->lock); /* record NAPI ID of new item if present */ ep_set_busy_poll_napi_id(epi); /* If the file is already "ready" we drop it inside the ready list */ if (revents && !ep_is_linked(epi)) { list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) wake_up(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } write_unlock_irq(&ep->lock); /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, NULL, 0); return 0; } /* * Modify the interest event mask by dropping an event if the new mask * has a match in the current file status. Must be called with "mtx" held. */ static int ep_modify(struct eventpoll *ep, struct epitem *epi, const struct epoll_event *event) { int pwake = 0; poll_table pt; lockdep_assert_irqs_enabled(); init_poll_funcptr(&pt, NULL); /* * Set the new event interest mask before calling f_op->poll(); * otherwise we might miss an event that happens between the * f_op->poll() call and the new event set registering. */ epi->event.events = event->events; /* need barrier below */ epi->event.data = event->data; /* protected by mtx */ if (epi->event.events & EPOLLWAKEUP) { if (!ep_has_wakeup_source(epi)) ep_create_wakeup_source(epi); } else if (ep_has_wakeup_source(epi)) { ep_destroy_wakeup_source(epi); } /* * The following barrier has two effects: * * 1) Flush epi changes above to other CPUs. This ensures * we do not miss events from ep_poll_callback if an * event occurs immediately after we call f_op->poll(). * We need this because we did not take ep->lock while * changing epi above (but ep_poll_callback does take * ep->lock). * * 2) We also need to ensure we do not miss _past_ events * when calling f_op->poll(). This barrier also * pairs with the barrier in wq_has_sleeper (see * comments for wq_has_sleeper). * * This barrier will now guarantee ep_poll_callback or f_op->poll * (or both) will notice the readiness of an item. */ smp_mb(); /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. * If the item is "hot" and it is not registered inside the ready * list, push it inside. */ if (ep_item_poll(epi, &pt, 1)) { write_lock_irq(&ep->lock); if (!ep_is_linked(epi)) { list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) wake_up(&ep->wq); if (waitqueue_active(&ep->poll_wait)) pwake++; } write_unlock_irq(&ep->lock); } /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(ep, NULL, 0); return 0; } static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, int maxevents) { struct epitem *epi, *tmp; LIST_HEAD(txlist); poll_table pt; int res = 0; /* * Always short-circuit for fatal signals to allow threads to make a * timely exit without the chance of finding more events available and * fetching repeatedly. */ if (fatal_signal_pending(current)) return -EINTR; init_poll_funcptr(&pt, NULL); mutex_lock(&ep->mtx); ep_start_scan(ep, &txlist); /* * We can loop without lock because we are passed a task private list. * Items cannot vanish during the loop we are holding ep->mtx. */ list_for_each_entry_safe(epi, tmp, &txlist, rdllink) { struct wakeup_source *ws; __poll_t revents; if (res >= maxevents) break; /* * Activate ep->ws before deactivating epi->ws to prevent * triggering auto-suspend here (in case we reactive epi->ws * below). * * This could be rearranged to delay the deactivation of epi->ws * instead, but then epi->ws would temporarily be out of sync * with ep_is_linked(). */ ws = ep_wakeup_source(epi); if (ws) { if (ws->active) __pm_stay_awake(ep->ws); __pm_relax(ws); } list_del_init(&epi->rdllink); /* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, we are holding ep->mtx, * so no operations coming from userspace can change the item. */ revents = ep_item_poll(epi, &pt, 1); if (!revents) continue; events = epoll_put_uevent(revents, epi->event.data, events); if (!events) { list_add(&epi->rdllink, &txlist); ep_pm_stay_awake(epi); if (!res) res = -EFAULT; break; } res++; if (epi->event.events & EPOLLONESHOT) epi->event.events &= EP_PRIVATE_BITS; else if (!(epi->event.events & EPOLLET)) { /* * If this file has been added with Level * Trigger mode, we need to insert back inside * the ready list, so that the next call to * epoll_wait() will check again the events * availability. At this point, no one can insert * into ep->rdllist besides us. The epoll_ctl() * callers are locked out by * ep_send_events() holding "mtx" and the * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); ep_pm_stay_awake(epi); } } ep_done_scan(ep, &txlist); mutex_unlock(&ep->mtx); return res; } static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) { struct timespec64 now; if (ms < 0) return NULL; if (!ms) { to->tv_sec = 0; to->tv_nsec = 0; return to; } to->tv_sec = ms / MSEC_PER_SEC; to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC); ktime_get_ts64(&now); *to = timespec64_add_safe(now, *to); return to; } /* * autoremove_wake_function, but remove even on failure to wake up, because we * know that default_wake_function/ttwu will only fail if the thread is already * woken, and in that case the ep_poll loop will remove the entry anyways, not * try to reuse it. */ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned int mode, int sync, void *key) { int ret = default_wake_function(wq_entry, mode, sync, key); /* * Pairs with list_empty_careful in ep_poll, and ensures future loop * iterations see the cause of this wakeup. */ list_del_init_careful(&wq_entry->entry); return ret; } /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. * * @ep: Pointer to the eventpoll context. * @events: Pointer to the userspace buffer where the ready events should be * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in * timespec. If the timeout is zero, the function will not block, * while if the @timeout ptr is NULL, the function will block * until at least one event has been retrieved (or an error * occurred). * * Return: the number of ready events which have been fetched, or an * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, struct timespec64 *timeout) { int res, eavail, timed_out = 0; u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; lockdep_assert_irqs_enabled(); if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { slack = select_estimate_accuracy(timeout); to = &expires; *to = timespec64_to_ktime(*timeout); } else if (timeout) { /* * Avoid the unnecessary trip to the wait queue loop, if the * caller specified a non blocking operation. */ timed_out = 1; } /* * This call is racy: We may or may not see events that are being added * to the ready list under the lock (e.g., in IRQ callbacks). For cases * with a non-zero timeout, this thread will check the ready list under * lock and will add to the wait queue. For cases with a zero * timeout, the user by definition should not care and will have to * recheck again. */ eavail = ep_events_available(ep); while (1) { if (eavail) { /* * Try to transfer events to user space. In case we get * 0 events and there's still timeout left over, we go * trying again in search of more luck. */ res = ep_send_events(ep, events, maxevents); if (res) { if (res > 0) ep_suspend_napi_irqs(ep); return res; } } if (timed_out) return 0; eavail = ep_busy_loop(ep, timed_out); if (eavail) continue; if (signal_pending(current)) return -EINTR; /* * Internally init_wait() uses autoremove_wake_function(), * thus wait entry is removed from the wait queue on each * wakeup. Why it is important? In case of several waiters * each new wakeup will hit the next waiter, giving it the * chance to harvest new event. Otherwise wakeup can be * lost. This is also good performance-wise, because on * normal wakeup path no need to call __remove_wait_queue() * explicitly, thus ep->lock is not taken, which halts the * event delivery. * * In fact, we now use an even more aggressive function that * unconditionally removes, because we don't reuse the wait * entry between loop iterations. This lets us also avoid the * performance issue if a process is killed, causing all of its * threads to wake up without being removed normally. */ init_wait(&wait); wait.func = ep_autoremove_wake_function; write_lock_irq(&ep->lock); /* * Barrierless variant, waitqueue_active() is called under * the same lock on wakeup ep_poll_callback() side, so it * is safe to avoid an explicit barrier. */ __set_current_state(TASK_INTERRUPTIBLE); /* * Do the final check under the lock. ep_start/done_scan() * plays with two lists (->rdllist and ->ovflist) and there * is always a race when both lists are empty for short * period of time although events are pending, so lock is * important. */ eavail = ep_events_available(ep); if (!eavail) __add_wait_queue_exclusive(&ep->wq, &wait); write_unlock_irq(&ep->lock); if (!eavail) timed_out = !schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* * We were woken up, thus go and try to harvest some events. * If timed out and still on the wait queue, recheck eavail * carefully under lock, below. */ eavail = 1; if (!list_empty_careful(&wait.entry)) { write_lock_irq(&ep->lock); /* * If the thread timed out and is not on the wait queue, * it means that the thread was woken up after its * timeout expired before it could reacquire the lock. * Thus, when wait.entry is empty, it needs to harvest * events. */ if (timed_out) eavail = list_empty(&wait.entry); __remove_wait_queue(&ep->wq, &wait); write_unlock_irq(&ep->lock); } } } /** * ep_loop_check_proc - verify that adding an epoll file inside another * epoll structure does not violate the constraints, in * terms of closed loops, or too deep chains (which can * result in excessive stack usage). * * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * * Return: %zero if adding the epoll @file inside current epoll * structure @ep does not violate the constraints, or %-1 otherwise. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { int error = 0; struct rb_node *rbp; struct epitem *epi; mutex_lock_nested(&ep->mtx, depth + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->gen == loop_check_gen) continue; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) error = -1; else error = ep_loop_check_proc(ep_tovisit, depth + 1); if (error != 0) break; } else { /* * If we've reached a file that is not associated with * an ep, then we need to check if the newly added * links are going to add too many wakeup paths. We do * this by adding it to the tfile_check_list, if it's * not already there, and calling reverse_path_check() * during ep_insert(). */ list_file(epi->ffd.file); } } mutex_unlock(&ep->mtx); return error; } /** * ep_loop_check - Performs a check to verify that adding an epoll file (@to) * into another epoll file (represented by @ep) does not create * closed loops or too deep chains. * * @ep: Pointer to the epoll we are inserting into. * @to: Pointer to the epoll to be inserted. * * Return: %zero if adding the epoll @to inside the epoll @from * does not violate the constraints, or %-1 otherwise. */ static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) { inserting_into = ep; return ep_loop_check_proc(to, 0); } static void clear_tfile_check_list(void) { rcu_read_lock(); while (tfile_check_list != EP_UNACTIVE_PTR) { struct epitems_head *head = tfile_check_list; tfile_check_list = head->next; unlist_file(head); } rcu_read_unlock(); } /* * Open an eventpoll file descriptor. */ static int do_epoll_create(int flags) { int error, fd; struct eventpoll *ep = NULL; struct file *file; /* Check the EPOLL_* constant for consistency. */ BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC); if (flags & ~EPOLL_CLOEXEC) return -EINVAL; /* * Create the internal data structure ("struct eventpoll"). */ error = ep_alloc(&ep); if (error < 0) return error; /* * Creates all the items needed to setup an eventpoll file. That is, * a file structure and a free file descriptor. */ fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC)); if (fd < 0) { error = fd; goto out_free_ep; } file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep, O_RDWR | (flags & O_CLOEXEC)); if (IS_ERR(file)) { error = PTR_ERR(file); goto out_free_fd; } ep->file = file; fd_install(fd, file); return fd; out_free_fd: put_unused_fd(fd); out_free_ep: ep_clear_and_put(ep); return error; } SYSCALL_DEFINE1(epoll_create1, int, flags) { return do_epoll_create(flags); } SYSCALL_DEFINE1(epoll_create, int, size) { if (size <= 0) return -EINVAL; return do_epoll_create(0); } #ifdef CONFIG_PM_SLEEP static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) { if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) epev->events &= ~EPOLLWAKEUP; } #else static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev) { epev->events &= ~EPOLLWAKEUP; } #endif static inline int epoll_mutex_lock(struct mutex *mutex, int depth, bool nonblock) { if (!nonblock) { mutex_lock_nested(mutex, depth); return 0; } if (mutex_trylock(mutex)) return 0; return -EAGAIN; } int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, bool nonblock) { int error; int full_check = 0; struct eventpoll *ep; struct epitem *epi; struct eventpoll *tep = NULL; CLASS(fd, f)(epfd); if (fd_empty(f)) return -EBADF; /* Get the "struct file *" for the target file */ CLASS(fd, tf)(fd); if (fd_empty(tf)) return -EBADF; /* The target file descriptor must support poll */ if (!file_can_poll(fd_file(tf))) return -EPERM; /* Check if EPOLLWAKEUP is allowed */ if (ep_op_has_event(op)) ep_take_care_of_epollwakeup(epds); /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit * adding an epoll file descriptor inside itself. */ error = -EINVAL; if (fd_file(f) == fd_file(tf) || !is_file_epoll(fd_file(f))) goto error_tgt_fput; /* * epoll adds to the wakeup queue at EPOLL_CTL_ADD time only, * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation. * Also, we do not currently supported nested exclusive wakeups. */ if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) { if (op == EPOLL_CTL_MOD) goto error_tgt_fput; if (op == EPOLL_CTL_ADD && (is_file_epoll(fd_file(tf)) || (epds->events & ~EPOLLEXCLUSIVE_OK_BITS))) goto error_tgt_fput; } /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ ep = fd_file(f)->private_data; /* * When we insert an epoll file descriptor inside another epoll file * descriptor, there is the chance of creating closed loops, which are * better be handled here, than in more critical paths. While we are * checking for loops we also determine the list of files reachable * and hang them on the tfile_check_list, so we can check that we * haven't created too many possible wakeup paths. * * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when * the epoll file descriptor is attaching directly to a wakeup source, * unless the epoll file descriptor is nested. The purpose of taking the * 'epnested_mutex' on add is to prevent complex toplogies such as loops and * deep wakeup paths from forming in parallel through multiple * EPOLL_CTL_ADD operations. */ error = epoll_mutex_lock(&ep->mtx, 0, nonblock); if (error) goto error_tgt_fput; if (op == EPOLL_CTL_ADD) { if (READ_ONCE(fd_file(f)->f_ep) || ep->gen == loop_check_gen || is_file_epoll(fd_file(tf))) { mutex_unlock(&ep->mtx); error = epoll_mutex_lock(&epnested_mutex, 0, nonblock); if (error) goto error_tgt_fput; loop_check_gen++; full_check = 1; if (is_file_epoll(fd_file(tf))) { tep = fd_file(tf)->private_data; error = -ELOOP; if (ep_loop_check(ep, tep) != 0) goto error_tgt_fput; } error = epoll_mutex_lock(&ep->mtx, 0, nonblock); if (error) goto error_tgt_fput; } } /* * Try to lookup the file inside our RB tree. Since we grabbed "mtx" * above, we can be sure to be able to use the item looked up by * ep_find() till we release the mutex. */ epi = ep_find(ep, fd_file(tf), fd); error = -EINVAL; switch (op) { case EPOLL_CTL_ADD: if (!epi) { epds->events |= EPOLLERR | EPOLLHUP; error = ep_insert(ep, epds, fd_file(tf), fd, full_check); } else error = -EEXIST; break; case EPOLL_CTL_DEL: if (epi) { /* * The eventpoll itself is still alive: the refcount * can't go to zero here. */ ep_remove_safe(ep, epi); error = 0; } else { error = -ENOENT; } break; case EPOLL_CTL_MOD: if (epi) { if (!(epi->event.events & EPOLLEXCLUSIVE)) { epds->events |= EPOLLERR | EPOLLHUP; error = ep_modify(ep, epi, epds); } } else error = -ENOENT; break; } mutex_unlock(&ep->mtx); error_tgt_fput: if (full_check) { clear_tfile_check_list(); loop_check_gen++; mutex_unlock(&epnested_mutex); } return error; } /* * The following function implements the controller interface for * the eventpoll file that enables the insertion/removal/change of * file descriptors inside the interest set. */ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event) { struct epoll_event epds; if (ep_op_has_event(op) && copy_from_user(&epds, event, sizeof(struct epoll_event))) return -EFAULT; return do_epoll_ctl(epfd, op, fd, &epds, false); } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *to) { struct eventpoll *ep; /* The maximum number of event must be greater than zero */ if (maxevents <= 0 || maxevents > EP_MAX_EVENTS) return -EINVAL; /* Verify that the area passed by the user is writeable */ if (!access_ok(events, maxevents * sizeof(struct epoll_event))) return -EFAULT; /* Get the "struct file *" for the eventpoll file */ CLASS(fd, f)(epfd); if (fd_empty(f)) return -EBADF; /* * We have to check that the file structure underneath the fd * the user passed to us _is_ an eventpoll file. */ if (!is_file_epoll(fd_file(f))) return -EINVAL; /* * At this point it is safe to assume that the "private_data" contains * our own data structure. */ ep = fd_file(f)->private_data; /* Time to fish for events ... */ return ep_poll(ep, events, maxevents, to); } SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { struct timespec64 to; return do_epoll_wait(epfd, events, maxevents, ep_timeout_to_timespec(&to, timeout)); } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ static int do_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *to, const sigset_t __user *sigmask, size_t sigsetsize) { int error; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ error = set_user_sigmask(sigmask, sigsetsize); if (error) return error; error = do_epoll_wait(epfd, events, maxevents, to); restore_saved_sigmask_unless(error == -EINTR); return error; } SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { struct timespec64 to; return do_epoll_pwait(epfd, events, maxevents, ep_timeout_to_timespec(&to, timeout), sigmask, sigsetsize); } SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, int, maxevents, const struct __kernel_timespec __user *, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { struct timespec64 ts, *to = NULL; if (timeout) { if (get_timespec64(&ts, timeout)) return -EFAULT; to = &ts; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } return do_epoll_pwait(epfd, events, maxevents, to, sigmask, sigsetsize); } #ifdef CONFIG_COMPAT static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, struct timespec64 *timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize) { long err; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ err = set_compat_user_sigmask(sigmask, sigsetsize); if (err) return err; err = do_epoll_wait(epfd, events, maxevents, timeout); restore_saved_sigmask_unless(err == -EINTR); return err; } COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { struct timespec64 to; return do_compat_epoll_pwait(epfd, events, maxevents, ep_timeout_to_timespec(&to, timeout), sigmask, sigsetsize); } COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, int, maxevents, const struct __kernel_timespec __user *, timeout, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { struct timespec64 ts, *to = NULL; if (timeout) { if (get_timespec64(&ts, timeout)) return -EFAULT; to = &ts; if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } return do_compat_epoll_pwait(epfd, events, maxevents, to, sigmask, sigsetsize); } #endif static int __init eventpoll_init(void) { struct sysinfo si; si_meminfo(&si); /* * Allows top 4% of lomem to be allocated for epoll watches (per user). */ max_user_watches = (((si.totalram - si.totalhigh) / 25) << PAGE_SHIFT) / EP_ITEM_COST; BUG_ON(max_user_watches < 0); /* * We can have many thousands of epitems, so prevent this from * using an extra cache line on 64-bit (and smaller) CPUs */ BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128); /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); /* Allocates slab cache used to allocate "struct eppoll_entry" */ pwq_cache = kmem_cache_create("eventpoll_pwq", sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); epoll_sysctls_init(); ephead_cache = kmem_cache_create("ep_head", sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); return 0; } fs_initcall(eventpoll_init); |
4 4 4 3 4 4 4 19 19 19 1 18 18 18 1 22 22 21 22 22 20 1 19 19 21 11 2 11 1 5 6 11 11 6 7 7 7 6 6 9 5 1 2 1 3 7 1 6 6 6 7 1 1 10 10 10 10 10 10 10 2 1 2 2 2 2 2 1 1 11 2 1 3 3 4 2 2 2 2 7 2 1 3 1 2 2 9 9 9 9 8 8 9 2 2 2 2 9 9 9 9 7 9 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 | // SPDX-License-Identifier: GPL-2.0-only /* * HWSIM IEEE 802.15.4 interface * * (C) 2018 Mojatau, Alexander Aring <aring@mojatau.com> * Copyright 2007-2012 Siemens AG * * Based on fakelb, original Written by: * Sergey Lapin <slapin@ossfans.org> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #include <linux/module.h> #include <linux/timer.h> #include <linux/platform_device.h> #include <linux/rtnetlink.h> #include <linux/netdevice.h> #include <linux/device.h> #include <linux/spinlock.h> #include <net/ieee802154_netdev.h> #include <net/mac802154.h> #include <net/cfg802154.h> #include <net/genetlink.h> #include "mac802154_hwsim.h" MODULE_DESCRIPTION("Software simulator of IEEE 802.15.4 radio(s) for mac802154"); MODULE_LICENSE("GPL"); static LIST_HEAD(hwsim_phys); static DEFINE_MUTEX(hwsim_phys_lock); static struct platform_device *mac802154hwsim_dev; /* MAC802154_HWSIM netlink family */ static struct genl_family hwsim_genl_family; static int hwsim_radio_idx; enum hwsim_multicast_groups { HWSIM_MCGRP_CONFIG, }; static const struct genl_multicast_group hwsim_mcgrps[] = { [HWSIM_MCGRP_CONFIG] = { .name = "config", }, }; struct hwsim_pib { u8 page; u8 channel; struct ieee802154_hw_addr_filt filt; enum ieee802154_filtering_level filt_level; struct rcu_head rcu; }; struct hwsim_edge_info { u8 lqi; struct rcu_head rcu; }; struct hwsim_edge { struct hwsim_phy *endpoint; struct hwsim_edge_info __rcu *info; struct list_head list; struct rcu_head rcu; }; struct hwsim_phy { struct ieee802154_hw *hw; u32 idx; struct hwsim_pib __rcu *pib; bool suspended; struct list_head edges; struct list_head list; }; static int hwsim_add_one(struct genl_info *info, struct device *dev, bool init); static void hwsim_del(struct hwsim_phy *phy); static int hwsim_hw_ed(struct ieee802154_hw *hw, u8 *level) { *level = 0xbe; return 0; } static int hwsim_update_pib(struct ieee802154_hw *hw, u8 page, u8 channel, struct ieee802154_hw_addr_filt *filt, enum ieee802154_filtering_level filt_level) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib, *pib_old; pib = kzalloc(sizeof(*pib), GFP_ATOMIC); if (!pib) return -ENOMEM; pib_old = rtnl_dereference(phy->pib); pib->page = page; pib->channel = channel; pib->filt.short_addr = filt->short_addr; pib->filt.pan_id = filt->pan_id; pib->filt.ieee_addr = filt->ieee_addr; pib->filt.pan_coord = filt->pan_coord; pib->filt_level = filt_level; rcu_assign_pointer(phy->pib, pib); kfree_rcu(pib_old, rcu); return 0; } static int hwsim_hw_channel(struct ieee802154_hw *hw, u8 page, u8 channel) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, page, channel, &pib->filt, pib->filt_level); rcu_read_unlock(); return ret; } static int hwsim_hw_addr_filt(struct ieee802154_hw *hw, struct ieee802154_hw_addr_filt *filt, unsigned long changed) { struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, pib->page, pib->channel, filt, pib->filt_level); rcu_read_unlock(); return ret; } static void hwsim_hw_receive(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi) { struct ieee802154_hdr hdr; struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; rcu_read_lock(); pib = rcu_dereference(phy->pib); if (!pskb_may_pull(skb, 3)) { dev_dbg(hw->parent, "invalid frame\n"); goto drop; } memcpy(&hdr, skb->data, 3); /* Level 4 filtering: Frame fields validity */ if (pib->filt_level == IEEE802154_FILTERING_4_FRAME_FIELDS) { /* a) Drop reserved frame types */ switch (mac_cb(skb)->type) { case IEEE802154_FC_TYPE_BEACON: case IEEE802154_FC_TYPE_DATA: case IEEE802154_FC_TYPE_ACK: case IEEE802154_FC_TYPE_MAC_CMD: break; default: dev_dbg(hw->parent, "unrecognized frame type 0x%x\n", mac_cb(skb)->type); goto drop; } /* b) Drop reserved frame versions */ switch (hdr.fc.version) { case IEEE802154_2003_STD: case IEEE802154_2006_STD: case IEEE802154_STD: break; default: dev_dbg(hw->parent, "unrecognized frame version 0x%x\n", hdr.fc.version); goto drop; } /* c) PAN ID constraints */ if ((mac_cb(skb)->dest.mode == IEEE802154_ADDR_LONG || mac_cb(skb)->dest.mode == IEEE802154_ADDR_SHORT) && mac_cb(skb)->dest.pan_id != pib->filt.pan_id && mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST)) { dev_dbg(hw->parent, "unrecognized PAN ID %04x\n", le16_to_cpu(mac_cb(skb)->dest.pan_id)); goto drop; } /* d1) Short address constraints */ if (mac_cb(skb)->dest.mode == IEEE802154_ADDR_SHORT && mac_cb(skb)->dest.short_addr != pib->filt.short_addr && mac_cb(skb)->dest.short_addr != cpu_to_le16(IEEE802154_ADDR_BROADCAST)) { dev_dbg(hw->parent, "unrecognized short address %04x\n", le16_to_cpu(mac_cb(skb)->dest.short_addr)); goto drop; } /* d2) Extended address constraints */ if (mac_cb(skb)->dest.mode == IEEE802154_ADDR_LONG && mac_cb(skb)->dest.extended_addr != pib->filt.ieee_addr) { dev_dbg(hw->parent, "unrecognized long address 0x%016llx\n", mac_cb(skb)->dest.extended_addr); goto drop; } /* d4) Specific PAN coordinator case (no parent) */ if ((mac_cb(skb)->type == IEEE802154_FC_TYPE_DATA || mac_cb(skb)->type == IEEE802154_FC_TYPE_MAC_CMD) && mac_cb(skb)->dest.mode == IEEE802154_ADDR_NONE) { dev_dbg(hw->parent, "relaying is not supported\n"); goto drop; } /* e) Beacon frames follow specific PAN ID rules */ if (mac_cb(skb)->type == IEEE802154_FC_TYPE_BEACON && pib->filt.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST) && mac_cb(skb)->dest.pan_id != pib->filt.pan_id) { dev_dbg(hw->parent, "invalid beacon PAN ID %04x\n", le16_to_cpu(mac_cb(skb)->dest.pan_id)); goto drop; } } rcu_read_unlock(); ieee802154_rx_irqsafe(hw, skb, lqi); return; drop: rcu_read_unlock(); kfree_skb(skb); } static int hwsim_hw_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) { struct hwsim_phy *current_phy = hw->priv; struct hwsim_pib *current_pib, *endpoint_pib; struct hwsim_edge_info *einfo; struct hwsim_edge *e; WARN_ON(current_phy->suspended); rcu_read_lock(); current_pib = rcu_dereference(current_phy->pib); list_for_each_entry_rcu(e, ¤t_phy->edges, list) { /* Can be changed later in rx_irqsafe, but this is only a * performance tweak. Received radio should drop the frame * in mac802154 stack anyway... so we don't need to be * 100% of locking here to check on suspended */ if (e->endpoint->suspended) continue; endpoint_pib = rcu_dereference(e->endpoint->pib); if (current_pib->page == endpoint_pib->page && current_pib->channel == endpoint_pib->channel) { struct sk_buff *newskb = pskb_copy(skb, GFP_ATOMIC); einfo = rcu_dereference(e->info); if (newskb) hwsim_hw_receive(e->endpoint->hw, newskb, einfo->lqi); } } rcu_read_unlock(); ieee802154_xmit_complete(hw, skb, false); return 0; } static int hwsim_hw_start(struct ieee802154_hw *hw) { struct hwsim_phy *phy = hw->priv; phy->suspended = false; return 0; } static void hwsim_hw_stop(struct ieee802154_hw *hw) { struct hwsim_phy *phy = hw->priv; phy->suspended = true; } static int hwsim_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) { enum ieee802154_filtering_level filt_level; struct hwsim_phy *phy = hw->priv; struct hwsim_pib *pib; int ret; if (on) filt_level = IEEE802154_FILTERING_NONE; else filt_level = IEEE802154_FILTERING_4_FRAME_FIELDS; rcu_read_lock(); pib = rcu_dereference(phy->pib); ret = hwsim_update_pib(hw, pib->page, pib->channel, &pib->filt, filt_level); rcu_read_unlock(); return ret; } static const struct ieee802154_ops hwsim_ops = { .owner = THIS_MODULE, .xmit_async = hwsim_hw_xmit, .ed = hwsim_hw_ed, .set_channel = hwsim_hw_channel, .start = hwsim_hw_start, .stop = hwsim_hw_stop, .set_promiscuous_mode = hwsim_set_promiscuous_mode, .set_hw_addr_filt = hwsim_hw_addr_filt, }; static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) { return hwsim_add_one(info, &mac802154hwsim_dev->dev, false); } static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_phy *phy, *tmp; s64 idx = -1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]) return -EINVAL; idx = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) { if (idx == phy->idx) { hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); return 0; } } mutex_unlock(&hwsim_phys_lock); return -ENODEV; } static int append_radio_msg(struct sk_buff *skb, struct hwsim_phy *phy) { struct nlattr *nl_edges, *nl_edge; struct hwsim_edge_info *einfo; struct hwsim_edge *e; int ret; ret = nla_put_u32(skb, MAC802154_HWSIM_ATTR_RADIO_ID, phy->idx); if (ret < 0) return ret; rcu_read_lock(); if (list_empty(&phy->edges)) { rcu_read_unlock(); return 0; } nl_edges = nla_nest_start_noflag(skb, MAC802154_HWSIM_ATTR_RADIO_EDGES); if (!nl_edges) { rcu_read_unlock(); return -ENOBUFS; } list_for_each_entry_rcu(e, &phy->edges, list) { nl_edge = nla_nest_start_noflag(skb, MAC802154_HWSIM_ATTR_RADIO_EDGE); if (!nl_edge) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edges); return -ENOBUFS; } ret = nla_put_u32(skb, MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID, e->endpoint->idx); if (ret < 0) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edge); nla_nest_cancel(skb, nl_edges); return ret; } einfo = rcu_dereference(e->info); ret = nla_put_u8(skb, MAC802154_HWSIM_EDGE_ATTR_LQI, einfo->lqi); if (ret < 0) { rcu_read_unlock(); nla_nest_cancel(skb, nl_edge); nla_nest_cancel(skb, nl_edges); return ret; } nla_nest_end(skb, nl_edge); } rcu_read_unlock(); nla_nest_end(skb, nl_edges); return 0; } static int hwsim_get_radio(struct sk_buff *skb, struct hwsim_phy *phy, u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; int res; hdr = genlmsg_put(skb, portid, seq, &hwsim_genl_family, flags, MAC802154_HWSIM_CMD_GET_RADIO); if (!hdr) return -EMSGSIZE; if (cb) genl_dump_check_consistent(cb, hdr); res = append_radio_msg(skb, phy); if (res < 0) goto out_err; genlmsg_end(skb, hdr); return 0; out_err: genlmsg_cancel(skb, hdr); return res; } static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) { struct hwsim_phy *phy; struct sk_buff *skb; int idx, res = -ENODEV; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]) return -EINVAL; idx = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); mutex_lock(&hwsim_phys_lock); list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx != idx) continue; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) { res = -ENOMEM; goto out_err; } res = hwsim_get_radio(skb, phy, info->snd_portid, info->snd_seq, NULL, 0); if (res < 0) { nlmsg_free(skb); goto out_err; } res = genlmsg_reply(skb, info); break; } out_err: mutex_unlock(&hwsim_phys_lock); return res; } static int hwsim_dump_radio_nl(struct sk_buff *skb, struct netlink_callback *cb) { int idx = cb->args[0]; struct hwsim_phy *phy; int res; mutex_lock(&hwsim_phys_lock); if (idx == hwsim_radio_idx) goto done; list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx < idx) continue; res = hwsim_get_radio(skb, phy, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (res < 0) break; idx = phy->idx + 1; } cb->args[0] = idx; done: mutex_unlock(&hwsim_phys_lock); return skb->len; } /* caller need to held hwsim_phys_lock */ static struct hwsim_phy *hwsim_get_radio_by_id(uint32_t idx) { struct hwsim_phy *phy; list_for_each_entry(phy, &hwsim_phys, list) { if (phy->idx == idx) return phy; } return NULL; } static const struct nla_policy hwsim_edge_policy[MAC802154_HWSIM_EDGE_ATTR_MAX + 1] = { [MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] = { .type = NLA_U32 }, [MAC802154_HWSIM_EDGE_ATTR_LQI] = { .type = NLA_U8 }, }; static struct hwsim_edge *hwsim_alloc_edge(struct hwsim_phy *endpoint, u8 lqi) { struct hwsim_edge_info *einfo; struct hwsim_edge *e; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) return NULL; einfo = kzalloc(sizeof(*einfo), GFP_KERNEL); if (!einfo) { kfree(e); return NULL; } einfo->lqi = 0xff; rcu_assign_pointer(e->info, einfo); e->endpoint = endpoint; return e; } static void hwsim_free_edge(struct hwsim_edge *e) { struct hwsim_edge_info *einfo; rcu_read_lock(); einfo = rcu_dereference(e->info); rcu_read_unlock(); kfree_rcu(einfo, rcu); kfree_rcu(e, rcu); } static int hwsim_new_edge_nl(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_phy *phy_v0, *phy_v1; struct hwsim_edge *e; u32 v0, v1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); if (v0 == v1) return -EINVAL; mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } phy_v1 = hwsim_get_radio_by_id(v1); if (!phy_v1) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { mutex_unlock(&hwsim_phys_lock); rcu_read_unlock(); return -EEXIST; } } rcu_read_unlock(); e = hwsim_alloc_edge(phy_v1, 0xff); if (!e) { mutex_unlock(&hwsim_phys_lock); return -ENOMEM; } list_add_rcu(&e->list, &phy_v0->edges); /* wait until changes are done under hwsim_phys_lock lock * should prevent of calling this function twice while * edges list has not the changes yet. */ synchronize_rcu(); mutex_unlock(&hwsim_phys_lock); return 0; } static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_phy *phy_v0; struct hwsim_edge *e; u32 v0, v1; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { rcu_read_unlock(); list_del_rcu(&e->list); hwsim_free_edge(e); /* same again - wait until list changes are done */ synchronize_rcu(); mutex_unlock(&hwsim_phys_lock); return 0; } } rcu_read_unlock(); mutex_unlock(&hwsim_phys_lock); return -ENOENT; } static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; struct hwsim_edge_info *einfo, *einfo_old; struct hwsim_phy *phy_v0; struct hwsim_edge *e; u32 v0, v1; u8 lqi; if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] || !edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]) return -EINVAL; v0 = nla_get_u32(info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID]); v1 = nla_get_u32(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID]); lqi = nla_get_u8(edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]); mutex_lock(&hwsim_phys_lock); phy_v0 = hwsim_get_radio_by_id(v0); if (!phy_v0) { mutex_unlock(&hwsim_phys_lock); return -ENOENT; } einfo = kzalloc(sizeof(*einfo), GFP_KERNEL); if (!einfo) { mutex_unlock(&hwsim_phys_lock); return -ENOMEM; } rcu_read_lock(); list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { einfo->lqi = lqi; einfo_old = rcu_replace_pointer(e->info, einfo, lockdep_is_held(&hwsim_phys_lock)); rcu_read_unlock(); kfree_rcu(einfo_old, rcu); mutex_unlock(&hwsim_phys_lock); return 0; } } rcu_read_unlock(); kfree(einfo); mutex_unlock(&hwsim_phys_lock); return -ENOENT; } /* MAC802154_HWSIM netlink policy */ static const struct nla_policy hwsim_genl_policy[MAC802154_HWSIM_ATTR_MAX + 1] = { [MAC802154_HWSIM_ATTR_RADIO_ID] = { .type = NLA_U32 }, [MAC802154_HWSIM_ATTR_RADIO_EDGE] = { .type = NLA_NESTED }, [MAC802154_HWSIM_ATTR_RADIO_EDGES] = { .type = NLA_NESTED }, }; /* Generic Netlink operations array */ static const struct genl_small_ops hwsim_nl_ops[] = { { .cmd = MAC802154_HWSIM_CMD_NEW_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_radio_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_GET_RADIO, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_get_radio_nl, .dumpit = hwsim_dump_radio_nl, }, { .cmd = MAC802154_HWSIM_CMD_NEW_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_new_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_DEL_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_del_edge_nl, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MAC802154_HWSIM_CMD_SET_EDGE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = hwsim_set_edge_lqi, .flags = GENL_UNS_ADMIN_PERM, }, }; static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC802154_HWSIM", .version = 1, .maxattr = MAC802154_HWSIM_ATTR_MAX, .policy = hwsim_genl_policy, .module = THIS_MODULE, .small_ops = hwsim_nl_ops, .n_small_ops = ARRAY_SIZE(hwsim_nl_ops), .resv_start_op = MAC802154_HWSIM_CMD_NEW_EDGE + 1, .mcgrps = hwsim_mcgrps, .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; static void hwsim_mcast_config_msg(struct sk_buff *mcast_skb, struct genl_info *info) { if (info) genl_notify(&hwsim_genl_family, mcast_skb, info, HWSIM_MCGRP_CONFIG, GFP_KERNEL); else genlmsg_multicast(&hwsim_genl_family, mcast_skb, 0, HWSIM_MCGRP_CONFIG, GFP_KERNEL); } static void hwsim_mcast_new_radio(struct genl_info *info, struct hwsim_phy *phy) { struct sk_buff *mcast_skb; void *data; mcast_skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!mcast_skb) return; data = genlmsg_put(mcast_skb, 0, 0, &hwsim_genl_family, 0, MAC802154_HWSIM_CMD_NEW_RADIO); if (!data) goto out_err; if (append_radio_msg(mcast_skb, phy) < 0) goto out_err; genlmsg_end(mcast_skb, data); hwsim_mcast_config_msg(mcast_skb, info); return; out_err: genlmsg_cancel(mcast_skb, data); nlmsg_free(mcast_skb); } static void hwsim_edge_unsubscribe_me(struct hwsim_phy *phy) { struct hwsim_phy *tmp; struct hwsim_edge *e; rcu_read_lock(); /* going to all phy edges and remove phy from it */ list_for_each_entry(tmp, &hwsim_phys, list) { list_for_each_entry_rcu(e, &tmp->edges, list) { if (e->endpoint->idx == phy->idx) { list_del_rcu(&e->list); hwsim_free_edge(e); } } } rcu_read_unlock(); synchronize_rcu(); } static int hwsim_subscribe_all_others(struct hwsim_phy *phy) { struct hwsim_phy *sub; struct hwsim_edge *e; list_for_each_entry(sub, &hwsim_phys, list) { e = hwsim_alloc_edge(sub, 0xff); if (!e) goto me_fail; list_add_rcu(&e->list, &phy->edges); } list_for_each_entry(sub, &hwsim_phys, list) { e = hwsim_alloc_edge(phy, 0xff); if (!e) goto sub_fail; list_add_rcu(&e->list, &sub->edges); } return 0; sub_fail: hwsim_edge_unsubscribe_me(phy); me_fail: rcu_read_lock(); list_for_each_entry_rcu(e, &phy->edges, list) { list_del_rcu(&e->list); hwsim_free_edge(e); } rcu_read_unlock(); return -ENOMEM; } static int hwsim_add_one(struct genl_info *info, struct device *dev, bool init) { struct ieee802154_hw *hw; struct hwsim_phy *phy; struct hwsim_pib *pib; int idx; int err; idx = hwsim_radio_idx++; hw = ieee802154_alloc_hw(sizeof(*phy), &hwsim_ops); if (!hw) return -ENOMEM; phy = hw->priv; phy->hw = hw; /* 868 MHz BPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 1; /* 915 MHz BPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 0x7fe; /* 2.4 GHz O-QPSK 802.15.4-2003 */ hw->phy->supported.channels[0] |= 0x7FFF800; /* 868 MHz ASK 802.15.4-2006 */ hw->phy->supported.channels[1] |= 1; /* 915 MHz ASK 802.15.4-2006 */ hw->phy->supported.channels[1] |= 0x7fe; /* 868 MHz O-QPSK 802.15.4-2006 */ hw->phy->supported.channels[2] |= 1; /* 915 MHz O-QPSK 802.15.4-2006 */ hw->phy->supported.channels[2] |= 0x7fe; /* 2.4 GHz CSS 802.15.4a-2007 */ hw->phy->supported.channels[3] |= 0x3fff; /* UWB Sub-gigahertz 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 1; /* UWB Low band 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 0x1e; /* UWB High band 802.15.4a-2007 */ hw->phy->supported.channels[4] |= 0xffe0; /* 750 MHz O-QPSK 802.15.4c-2009 */ hw->phy->supported.channels[5] |= 0xf; /* 750 MHz MPSK 802.15.4c-2009 */ hw->phy->supported.channels[5] |= 0xf0; /* 950 MHz BPSK 802.15.4d-2009 */ hw->phy->supported.channels[6] |= 0x3ff; /* 950 MHz GFSK 802.15.4d-2009 */ hw->phy->supported.channels[6] |= 0x3ffc00; ieee802154_random_extended_addr(&hw->phy->perm_extended_addr); /* hwsim phy channel 13 as default */ hw->phy->current_channel = 13; pib = kzalloc(sizeof(*pib), GFP_KERNEL); if (!pib) { err = -ENOMEM; goto err_pib; } pib->channel = 13; pib->filt.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); pib->filt.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); rcu_assign_pointer(phy->pib, pib); phy->idx = idx; INIT_LIST_HEAD(&phy->edges); hw->flags = IEEE802154_HW_PROMISCUOUS; hw->parent = dev; err = ieee802154_register_hw(hw); if (err) goto err_reg; mutex_lock(&hwsim_phys_lock); if (init) { err = hwsim_subscribe_all_others(phy); if (err < 0) { mutex_unlock(&hwsim_phys_lock); goto err_subscribe; } } list_add_tail(&phy->list, &hwsim_phys); mutex_unlock(&hwsim_phys_lock); hwsim_mcast_new_radio(info, phy); return idx; err_subscribe: ieee802154_unregister_hw(phy->hw); err_reg: kfree(pib); err_pib: ieee802154_free_hw(phy->hw); return err; } static void hwsim_del(struct hwsim_phy *phy) { struct hwsim_pib *pib; struct hwsim_edge *e; hwsim_edge_unsubscribe_me(phy); list_del(&phy->list); rcu_read_lock(); list_for_each_entry_rcu(e, &phy->edges, list) { list_del_rcu(&e->list); hwsim_free_edge(e); } pib = rcu_dereference(phy->pib); rcu_read_unlock(); kfree_rcu(pib, rcu); ieee802154_unregister_hw(phy->hw); ieee802154_free_hw(phy->hw); } static int hwsim_probe(struct platform_device *pdev) { struct hwsim_phy *phy, *tmp; int err, i; for (i = 0; i < 2; i++) { err = hwsim_add_one(NULL, &pdev->dev, true); if (err < 0) goto err_slave; } dev_info(&pdev->dev, "Added 2 mac802154 hwsim hardware radios\n"); return 0; err_slave: mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); return err; } static void hwsim_remove(struct platform_device *pdev) { struct hwsim_phy *phy, *tmp; mutex_lock(&hwsim_phys_lock); list_for_each_entry_safe(phy, tmp, &hwsim_phys, list) hwsim_del(phy); mutex_unlock(&hwsim_phys_lock); } static struct platform_driver mac802154hwsim_driver = { .probe = hwsim_probe, .remove = hwsim_remove, .driver = { .name = "mac802154_hwsim", }, }; static __init int hwsim_init_module(void) { int rc; rc = genl_register_family(&hwsim_genl_family); if (rc) return rc; mac802154hwsim_dev = platform_device_register_simple("mac802154_hwsim", -1, NULL, 0); if (IS_ERR(mac802154hwsim_dev)) { rc = PTR_ERR(mac802154hwsim_dev); goto platform_dev; } rc = platform_driver_register(&mac802154hwsim_driver); if (rc < 0) goto platform_drv; return 0; platform_drv: platform_device_unregister(mac802154hwsim_dev); platform_dev: genl_unregister_family(&hwsim_genl_family); return rc; } static __exit void hwsim_remove_module(void) { genl_unregister_family(&hwsim_genl_family); platform_driver_unregister(&mac802154hwsim_driver); platform_device_unregister(mac802154hwsim_dev); } module_init(hwsim_init_module); module_exit(hwsim_remove_module); |
21 10 4 12 174 166 19 19 3 104 36 32 11 20 6 149 20 146 70 78 223 27 175 9 1 182 2 12 2 7 2 7 161 1 7 13 6 31 12 196 166 173 10 173 10 11 151 32 168 175 16 150 182 1 3 6 2 7 6 2 5 25 1 2 1 15 10 31 115 98 74 5 36 114 6 1 8 8 117 4 2 316 10 4 75 104 2 5 19 43 32 60 5 23 16 1 5 6 6 4 2 9 7 17 10 15 5 8 10 10 10 2 4 8 3 4 5 4 6 33 21 15 25 25 6 19 13 6 25 7 18 18 18 7 93 75 6 17 8 34 46 49 48 43 1 4 1 3 32 11 11 11 8 11 4 9 10 5 3 1 4 6 2 146 24 18 3 78 4 95 8 71 4 14 15 100 3 7 1 3 3 6 7 10 34 31 4 11 25 12 18 25 25 33 1 1 2 2 2 162 163 164 1 145 19 2 146 33 175 15 2 2 160 2 163 72 91 176 10 28 176 139 47 164 2 25 7 176 173 176 175 62 12 104 173 1 173 3 411 410 388 3 340 58 63 334 361 26 79 36 5 313 101 321 67 340 20 173 375 57 230 87 144 347 347 75 83 30 20 8 19 19 19 12 12 12 8 8 6 2 8 2 1 1 53 10 4 4 5 10 6 53 4 49 25 4 26 46 2 47 32 4 1 2 3 5 25 20 12 19 20 13 4 20 17 255 251 11 255 10 255 199 202 162 24 23 167 68 46 190 14 43 37 3 75 43 63 75 71 75 7 1 72 1 2 1 2 181 8 3 2 166 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 | // SPDX-License-Identifier: GPL-1.0+ /* * n_tty.c --- implements the N_TTY line discipline. * * This code used to be in tty_io.c, but things are getting hairy * enough that it made sense to split things off. (The N_TTY * processing has changed so much that it's hardly recognizable, * anyway...) * * Note that the open routine for N_TTY is guaranteed never to return * an error. This is because Linux will fall back to setting a line * to N_TTY if it can not switch to any other line discipline. * * Written by Theodore Ts'o, Copyright 1994. * * This file also contains code originally written by Linus Torvalds, * Copyright 1991, 1992, 1993, and by Julian Cowley, Copyright 1994. * * Reduced memory usage for older ARM systems - Russell King. * * 2000/01/20 Fixed SMP locking on put_tty_queue using bits of * the patch by Andrew J. Kroll <ag784@freenet.buffalo.edu> * who actually finally proved there really was a race. * * 2002/03/18 Implemented n_tty_wakeup to send SIGIO POLL_OUTs to * waiting writing processes-Sapan Bhatia <sapan@corewars.org>. * Also fixed a bug in BLOCKING mode where n_tty_write returns * EAGAIN */ #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/fcntl.h> #include <linux/file.h> #include <linux/jiffies.h> #include <linux/math.h> #include <linux/poll.h> #include <linux/ratelimit.h> #include <linux/sched.h> #include <linux/signal.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/tty.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include "tty.h" /* * Until this number of characters is queued in the xmit buffer, select will * return "we have room for writes". */ #define WAKEUP_CHARS 256 /* * This defines the low- and high-watermarks for throttling and * unthrottling the TTY driver. These watermarks are used for * controlling the space in the read buffer. */ #define TTY_THRESHOLD_THROTTLE 128 /* now based on remaining room */ #define TTY_THRESHOLD_UNTHROTTLE 128 /* * Special byte codes used in the echo buffer to represent operations * or special handling of characters. Bytes in the echo buffer that * are not part of such special blocks are treated as normal character * codes. */ #define ECHO_OP_START 0xff #define ECHO_OP_MOVE_BACK_COL 0x80 #define ECHO_OP_SET_CANON_COL 0x81 #define ECHO_OP_ERASE_TAB 0x82 #define ECHO_COMMIT_WATERMARK 256 #define ECHO_BLOCK 256 #define ECHO_DISCARD_WATERMARK N_TTY_BUF_SIZE - (ECHO_BLOCK + 32) #undef N_TTY_TRACE #ifdef N_TTY_TRACE # define n_tty_trace(f, args...) trace_printk(f, ##args) #else # define n_tty_trace(f, args...) no_printk(f, ##args) #endif struct n_tty_data { /* producer-published */ size_t read_head; size_t commit_head; size_t canon_head; size_t echo_head; size_t echo_commit; size_t echo_mark; DECLARE_BITMAP(char_map, 256); /* private to n_tty_receive_overrun (single-threaded) */ unsigned long overrun_time; unsigned int num_overrun; /* non-atomic */ bool no_room; /* must hold exclusive termios_rwsem to reset these */ unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1; unsigned char push:1; /* shared by producer and consumer */ u8 read_buf[N_TTY_BUF_SIZE]; DECLARE_BITMAP(read_flags, N_TTY_BUF_SIZE); u8 echo_buf[N_TTY_BUF_SIZE]; /* consumer-published */ size_t read_tail; size_t line_start; /* # of chars looked ahead (to find software flow control chars) */ size_t lookahead_count; /* protected by output lock */ unsigned int column; unsigned int canon_column; size_t echo_tail; struct mutex atomic_read_lock; struct mutex output_lock; }; #define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1)) static inline size_t read_cnt(struct n_tty_data *ldata) { return ldata->read_head - ldata->read_tail; } static inline u8 read_buf(struct n_tty_data *ldata, size_t i) { return ldata->read_buf[MASK(i)]; } static inline u8 *read_buf_addr(struct n_tty_data *ldata, size_t i) { return &ldata->read_buf[MASK(i)]; } static inline u8 echo_buf(struct n_tty_data *ldata, size_t i) { smp_rmb(); /* Matches smp_wmb() in add_echo_byte(). */ return ldata->echo_buf[MASK(i)]; } static inline u8 *echo_buf_addr(struct n_tty_data *ldata, size_t i) { return &ldata->echo_buf[MASK(i)]; } /* If we are not echoing the data, perhaps this is a secret so erase it */ static void zero_buffer(const struct tty_struct *tty, u8 *buffer, size_t size) { if (L_ICANON(tty) && !L_ECHO(tty)) memset(buffer, 0, size); } static void tty_copy(const struct tty_struct *tty, void *to, size_t tail, size_t n) { struct n_tty_data *ldata = tty->disc_data; size_t size = N_TTY_BUF_SIZE - tail; void *from = read_buf_addr(ldata, tail); if (n > size) { tty_audit_add_data(tty, from, size); memcpy(to, from, size); zero_buffer(tty, from, size); to += size; n -= size; from = ldata->read_buf; } tty_audit_add_data(tty, from, n); memcpy(to, from, n); zero_buffer(tty, from, n); } /** * n_tty_kick_worker - start input worker (if required) * @tty: terminal * * Re-schedules the flip buffer work if it may have stopped. * * Locking: * * Caller holds exclusive %termios_rwsem, or * * n_tty_read()/consumer path: * holds non-exclusive %termios_rwsem */ static void n_tty_kick_worker(const struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; /* Did the input worker stop? Restart it */ if (unlikely(READ_ONCE(ldata->no_room))) { WRITE_ONCE(ldata->no_room, 0); WARN_RATELIMIT(tty->port->itty == NULL, "scheduling with invalid itty\n"); /* see if ldisc has been killed - if so, this means that * even though the ldisc has been halted and ->buf.work * cancelled, ->buf.work is about to be rescheduled */ WARN_RATELIMIT(test_bit(TTY_LDISC_HALTED, &tty->flags), "scheduling buffer work for halted ldisc\n"); tty_buffer_restart_work(tty->port); } } static ssize_t chars_in_buffer(const struct tty_struct *tty) { const struct n_tty_data *ldata = tty->disc_data; size_t head = ldata->icanon ? ldata->canon_head : ldata->commit_head; return head - ldata->read_tail; } /** * n_tty_write_wakeup - asynchronous I/O notifier * @tty: tty device * * Required for the ptys, serial driver etc. since processes that attach * themselves to the master and rely on ASYNC IO must be woken up. */ static void n_tty_write_wakeup(struct tty_struct *tty) { clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); kill_fasync(&tty->fasync, SIGIO, POLL_OUT); } static void n_tty_check_throttle(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; /* * Check the remaining room for the input canonicalization * mode. We don't want to throttle the driver if we're in * canonical mode and don't have a newline yet! */ if (ldata->icanon && ldata->canon_head == ldata->read_tail) return; do { tty_set_flow_change(tty, TTY_THROTTLE_SAFE); if (N_TTY_BUF_SIZE - read_cnt(ldata) >= TTY_THRESHOLD_THROTTLE) break; } while (!tty_throttle_safe(tty)); __tty_set_flow_change(tty, 0); } static void n_tty_check_unthrottle(struct tty_struct *tty) { if (tty->driver->type == TTY_DRIVER_TYPE_PTY) { if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE) return; n_tty_kick_worker(tty); tty_wakeup(tty->link); return; } /* If there is enough space in the read buffer now, let the * low-level driver know. We use chars_in_buffer() to * check the buffer, as it now knows about canonical mode. * Otherwise, if the driver is throttled and the line is * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode, * we won't get any more characters. */ do { tty_set_flow_change(tty, TTY_UNTHROTTLE_SAFE); if (chars_in_buffer(tty) > TTY_THRESHOLD_UNTHROTTLE) break; n_tty_kick_worker(tty); } while (!tty_unthrottle_safe(tty)); __tty_set_flow_change(tty, 0); } /** * put_tty_queue - add character to tty * @c: character * @ldata: n_tty data * * Add a character to the tty read_buf queue. * * Locking: * * n_tty_receive_buf()/producer path: * caller holds non-exclusive %termios_rwsem */ static inline void put_tty_queue(u8 c, struct n_tty_data *ldata) { *read_buf_addr(ldata, ldata->read_head) = c; ldata->read_head++; } /** * reset_buffer_flags - reset buffer state * @ldata: line disc data to reset * * Reset the read buffer counters and clear the flags. Called from * n_tty_open() and n_tty_flush_buffer(). * * Locking: * * caller holds exclusive %termios_rwsem, or * * (locking is not required) */ static void reset_buffer_flags(struct n_tty_data *ldata) { ldata->read_head = ldata->canon_head = ldata->read_tail = 0; ldata->commit_head = 0; ldata->line_start = 0; ldata->erasing = 0; bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE); ldata->push = 0; ldata->lookahead_count = 0; } static void n_tty_packet_mode_flush(struct tty_struct *tty) { unsigned long flags; if (tty->link->ctrl.packet) { spin_lock_irqsave(&tty->ctrl.lock, flags); tty->ctrl.pktstatus |= TIOCPKT_FLUSHREAD; spin_unlock_irqrestore(&tty->ctrl.lock, flags); wake_up_interruptible(&tty->link->read_wait); } } /** * n_tty_flush_buffer - clean input queue * @tty: terminal device * * Flush the input buffer. Called when the tty layer wants the buffer flushed * (eg at hangup) or when the %N_TTY line discipline internally has to clean * the pending queue (for example some signals). * * Holds %termios_rwsem to exclude producer/consumer while buffer indices are * reset. * * Locking: %ctrl.lock, exclusive %termios_rwsem */ static void n_tty_flush_buffer(struct tty_struct *tty) { down_write(&tty->termios_rwsem); reset_buffer_flags(tty->disc_data); n_tty_kick_worker(tty); if (tty->link) n_tty_packet_mode_flush(tty); up_write(&tty->termios_rwsem); } /** * is_utf8_continuation - utf8 multibyte check * @c: byte to check * * Returns: true if the utf8 character @c is a multibyte continuation * character. We use this to correctly compute the on-screen size of the * character when printing. */ static inline int is_utf8_continuation(u8 c) { return (c & 0xc0) == 0x80; } /** * is_continuation - multibyte check * @c: byte to check * @tty: terminal device * * Returns: true if the utf8 character @c is a multibyte continuation character * and the terminal is in unicode mode. */ static inline int is_continuation(u8 c, const struct tty_struct *tty) { return I_IUTF8(tty) && is_utf8_continuation(c); } /** * do_output_char - output one character * @c: character (or partial unicode symbol) * @tty: terminal device * @space: space available in tty driver write buffer * * This is a helper function that handles one output character (including * special characters like TAB, CR, LF, etc.), doing OPOST processing and * putting the results in the tty driver's write buffer. * * Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY and NLDLY. * They simply aren't relevant in the world today. If you ever need them, add * them here. * * Returns: the number of bytes of buffer space used or -1 if no space left. * * Locking: should be called under the %output_lock to protect the column state * and space left in the buffer. */ static int do_output_char(u8 c, struct tty_struct *tty, int space) { struct n_tty_data *ldata = tty->disc_data; int spaces; if (!space) return -1; switch (c) { case '\n': if (O_ONLRET(tty)) ldata->column = 0; if (O_ONLCR(tty)) { if (space < 2) return -1; ldata->canon_column = ldata->column = 0; tty->ops->write(tty, "\r\n", 2); return 2; } ldata->canon_column = ldata->column; break; case '\r': if (O_ONOCR(tty) && ldata->column == 0) return 0; if (O_OCRNL(tty)) { c = '\n'; if (O_ONLRET(tty)) ldata->canon_column = ldata->column = 0; break; } ldata->canon_column = ldata->column = 0; break; case '\t': spaces = 8 - (ldata->column & 7); if (O_TABDLY(tty) == XTABS) { if (space < spaces) return -1; ldata->column += spaces; tty->ops->write(tty, " ", spaces); return spaces; } ldata->column += spaces; break; case '\b': if (ldata->column > 0) ldata->column--; break; default: if (!iscntrl(c)) { if (O_OLCUC(tty)) c = toupper(c); if (!is_continuation(c, tty)) ldata->column++; } break; } tty_put_char(tty, c); return 1; } /** * process_output - output post processor * @c: character (or partial unicode symbol) * @tty: terminal device * * Output one character with OPOST processing. * * Returns: -1 when the output device is full and the character must be * retried. * * Locking: %output_lock to protect column state and space left (also, this is *called from n_tty_write() under the tty layer write lock). */ static int process_output(u8 c, struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; int space, retval; mutex_lock(&ldata->output_lock); space = tty_write_room(tty); retval = do_output_char(c, tty, space); mutex_unlock(&ldata->output_lock); if (retval < 0) return -1; else return 0; } /** * process_output_block - block post processor * @tty: terminal device * @buf: character buffer * @nr: number of bytes to output * * Output a block of characters with OPOST processing. * * This path is used to speed up block console writes, among other things when * processing blocks of output data. It handles only the simple cases normally * found and helps to generate blocks of symbols for the console driver and * thus improve performance. * * Returns: the number of characters output. * * Locking: %output_lock to protect column state and space left (also, this is * called from n_tty_write() under the tty layer write lock). */ static ssize_t process_output_block(struct tty_struct *tty, const u8 *buf, unsigned int nr) { struct n_tty_data *ldata = tty->disc_data; int space; int i; const u8 *cp; mutex_lock(&ldata->output_lock); space = tty_write_room(tty); if (space <= 0) { mutex_unlock(&ldata->output_lock); return space; } if (nr > space) nr = space; for (i = 0, cp = buf; i < nr; i++, cp++) { u8 c = *cp; switch (c) { case '\n': if (O_ONLRET(tty)) ldata->column = 0; if (O_ONLCR(tty)) goto break_out; ldata->canon_column = ldata->column; break; case '\r': if (O_ONOCR(tty) && ldata->column == 0) goto break_out; if (O_OCRNL(tty)) goto break_out; ldata->canon_column = ldata->column = 0; break; case '\t': goto break_out; case '\b': if (ldata->column > 0) ldata->column--; break; default: if (!iscntrl(c)) { if (O_OLCUC(tty)) goto break_out; if (!is_continuation(c, tty)) ldata->column++; } break; } } break_out: i = tty->ops->write(tty, buf, i); mutex_unlock(&ldata->output_lock); return i; } static int n_tty_process_echo_ops(struct tty_struct *tty, size_t *tail, int space) { struct n_tty_data *ldata = tty->disc_data; u8 op; /* * Since add_echo_byte() is called without holding output_lock, we * might see only portion of multi-byte operation. */ if (MASK(ldata->echo_commit) == MASK(*tail + 1)) return -ENODATA; /* * If the buffer byte is the start of a multi-byte operation, get the * next byte, which is either the op code or a control character value. */ op = echo_buf(ldata, *tail + 1); switch (op) { case ECHO_OP_ERASE_TAB: { unsigned int num_chars, num_bs; if (MASK(ldata->echo_commit) == MASK(*tail + 2)) return -ENODATA; num_chars = echo_buf(ldata, *tail + 2); /* * Determine how many columns to go back in order to erase the * tab. This depends on the number of columns used by other * characters within the tab area. If this (modulo 8) count is * from the start of input rather than from a previous tab, we * offset by canon column. Otherwise, tab spacing is normal. */ if (!(num_chars & 0x80)) num_chars += ldata->canon_column; num_bs = 8 - (num_chars & 7); if (num_bs > space) return -ENOSPC; space -= num_bs; while (num_bs--) { tty_put_char(tty, '\b'); if (ldata->column > 0) ldata->column--; } *tail += 3; break; } case ECHO_OP_SET_CANON_COL: ldata->canon_column = ldata->column; *tail += 2; break; case ECHO_OP_MOVE_BACK_COL: if (ldata->column > 0) ldata->column--; *tail += 2; break; case ECHO_OP_START: /* This is an escaped echo op start code */ if (!space) return -ENOSPC; tty_put_char(tty, ECHO_OP_START); ldata->column++; space--; *tail += 2; break; default: /* * If the op is not a special byte code, it is a ctrl char * tagged to be echoed as "^X" (where X is the letter * representing the control char). Note that we must ensure * there is enough space for the whole ctrl pair. */ if (space < 2) return -ENOSPC; tty_put_char(tty, '^'); tty_put_char(tty, op ^ 0100); ldata->column += 2; space -= 2; *tail += 2; break; } return space; } /** * __process_echoes - write pending echo characters * @tty: terminal device * * Write previously buffered echo (and other ldisc-generated) characters to the * tty. * * Characters generated by the ldisc (including echoes) need to be buffered * because the driver's write buffer can fill during heavy program output. * Echoing straight to the driver will often fail under these conditions, * causing lost characters and resulting mismatches of ldisc state information. * * Since the ldisc state must represent the characters actually sent to the * driver at the time of the write, operations like certain changes in column * state are also saved in the buffer and executed here. * * A circular fifo buffer is used so that the most recent characters are * prioritized. Also, when control characters are echoed with a prefixed "^", * the pair is treated atomically and thus not separated. * * Locking: callers must hold %output_lock. */ static size_t __process_echoes(struct tty_struct *tty) { struct n_tty_data *ldata = tty->disc_data; int space, old_space; size_t tail; u8 c; old_space = space = tty_write_room(tty); tail = ldata->echo_tail; while (MASK(ldata->echo_commit) != MASK(tail)) { c = echo_buf(ldata, tail); if (c == ECHO_OP_START) { int ret = n_tty_process_echo_ops(tty, &tail, space); if (ret == -ENODATA) goto not_yet_stored; if (ret < 0) break; |